diff options
Diffstat (limited to 'drivers/gpu/drm')
817 files changed, 24272 insertions, 9350 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 930de203d533..2d0fea87af79 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -84,7 +84,8 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \ + cyan_skillfish_reg_init.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef3af170dda4..17848ce65d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -819,6 +819,20 @@ struct amdgpu_ip_map_info { uint32_t mask); }; +enum amdgpu_uid_type { + AMDGPU_UID_TYPE_XCD, + AMDGPU_UID_TYPE_AID, + AMDGPU_UID_TYPE_SOC, + AMDGPU_UID_TYPE_MAX +}; + +#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */ + +struct amdgpu_uid { + uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX]; + struct amdgpu_device *adev; +}; + struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; @@ -896,6 +910,9 @@ struct amdgpu_pcie_reset_ctx { bool in_link_reset; bool occurs_dpc; bool audio_suspended; + struct pci_dev *swus; + struct pci_saved_state *swus_pcistate; + struct pci_saved_state *swds_pcistate; }; /* @@ -929,12 +946,6 @@ enum amdgpu_enforce_isolation_mode { AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3, }; - -/* - * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. - */ -#define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size) - struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1138,9 +1149,6 @@ struct amdgpu_device { /* for userq and VM fences */ struct amdgpu_seq64 seq64; - /* KFD */ - struct amdgpu_kfd_dev kfd; - /* UMC */ struct amdgpu_umc umc; @@ -1302,6 +1310,12 @@ struct amdgpu_device { struct list_head userq_mgr_list; struct mutex userq_mutex; bool userq_halt_for_enforce_isolation; + struct amdgpu_uid *uid_info; + + /* KFD + * Must be last --ends in a flexible-array member. + */ + struct amdgpu_kfd_dev kfd; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1785,4 +1799,9 @@ static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev) return 0; } +void amdgpu_device_set_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst, + uint64_t uid); +uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index cbc40cad581b..9b3180449150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -76,6 +76,7 @@ static void aca_banks_release(struct aca_banks *banks) list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); + banks->nr_banks--; } } @@ -130,6 +131,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n"); } +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) +{ + + struct aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || type == ACA_HWIP_TYPE_UNKNOW) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + hwid = ACA_REG__IPID__HARDWAREID(ipid); + mcatype = ACA_REG__IPID__MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, struct aca_banks *banks, struct ras_query_context *qctx) @@ -168,6 +190,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ bank.smu_err_type = type; + /* + * Poison being consumed when injecting a UE while running background workloads, + * which are unexpected. + */ + if (type == ACA_SMU_TYPE_UE && + ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) && + !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC)) + continue; + aca_smu_bank_dump(adev, i, count, &bank, qctx); ret = aca_banks_add_bank(banks, &bank); @@ -178,27 +209,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ return 0; } -static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) -{ - - struct aca_hwip *hwip; - int hwid, mcatype; - u64 ipid; - - if (!bank || type == ACA_HWIP_TYPE_UNKNOW) - return false; - - hwip = &aca_hwid_mcatypes[type]; - if (!hwip->hwid) - return false; - - ipid = bank->regs[ACA_REG_IDX_IPID]; - hwid = ACA_REG__IPID__HARDWAREID(ipid); - mcatype = ACA_REG__IPID__MCATYPE(ipid); - - return hwip->hwid == hwid && hwip->mcatype == mcatype; -} - static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -229,6 +239,7 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_ mutex_lock(&aerr->lock); list_add_tail(&bank_error->node, &aerr->list); + aerr->nr_errors++; mutex_unlock(&aerr->lock); return bank_error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 33eb4826b58b..127927b16ee2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -107,11 +107,13 @@ struct amdgpu_kfd_dev { bool init_complete; struct work_struct reset_work; - /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ - struct dev_pagemap pgmap; - /* Client for KFD BO GEM handle allocations */ struct drm_client_dev client; + + /* HMM page migration MEMORY_DEVICE_PRIVATE mapping + * Must be last --ends in a flexible-array member. + */ + struct dev_pagemap pgmap; }; enum kgd_engine_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260165bbe373..c3b34a410375 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) + kfd_mem_limit.max_system_mem_limit) { pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); + if (!no_system_mem_limit) { + ret = -ENOMEM; + goto release; + } + } - if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit) || - (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { + if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) { ret = -ENOMEM; goto release; } + /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with + * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip + * VRAM check since ttm_mem_limit check already cover this allocation + */ + + if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) { + uint64_t vram_available = + vram_size - reserved_for_pt - reserved_for_ras - + atomic64_read(&adev->vram_pin_size); + if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) { + ret = -ENOMEM; + goto release; + } + } + /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ @@ -494,7 +510,8 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); } -static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct kgd_mem *mem) { uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_DEFAULT; @@ -504,7 +521,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - return amdgpu_gem_va_map_flags(adev, mapping_flags); + return mapping_flags; } /** @@ -961,7 +978,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, goto unwind; } attachment[i]->va = va; - attachment[i]->pte_flags = get_pte_flags(adev, mem); + attachment[i]->pte_flags = get_pte_flags(adev, vm, mem); attachment[i]->adev = adev; list_add(&attachment[i]->list, &mem->attachments); @@ -1626,11 +1643,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, uint64_t vram_available, system_mem_available, ttm_mem_available; spin_lock(&kfd_mem_limit.mem_limit_lock); - vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - - adev->kfd.vram_used_aligned[xcp_id] - - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt - - reserved_for_ras; + if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu) + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id]; + else + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id] + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt + - reserved_for_ras; if (adev->apu_prefer_gtt) { system_mem_available = no_system_mem_limit ? @@ -2969,9 +2990,22 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * struct amdgpu_device *adev = amdgpu_ttm_adev( peer_vm->root.bo->tbo.bdev); + struct amdgpu_fpriv *fpriv = + container_of(peer_vm, struct amdgpu_fpriv, vm); + + ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (ret) { + dev_dbg(adev->dev, + "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n", + pid_nr(process_info->pid)); + goto validate_map_fail; + } + ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket); if (ret) { - pr_debug("Memory eviction: handle moved failed. Try again\n"); + dev_dbg(adev->dev, + "Memory eviction: handle moved failed, pid %8d. Try again.\n", + pid_nr(process_info->pid)); goto validate_map_fail; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index e476e45b996a..9dfdc08cc887 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1816,16 +1816,43 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, return sysfs_emit(buf, "%s\n", ctx->vbios_pn); } +static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct atom_context *ctx = adev->mode_info.atom_context; + + return sysfs_emit(buf, "%s\n", ctx->build_num); +} + static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, NULL); +static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL); static struct attribute *amdgpu_vbios_version_attrs[] = { - &dev_attr_vbios_version.attr, - NULL + &dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL }; +static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, + int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct atom_context *ctx = adev->mode_info.atom_context; + + if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num)) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vbios_version_attr_group = { - .attrs = amdgpu_vbios_version_attrs + .attrs = amdgpu_vbios_version_attrs, + .is_visible = amdgpu_vbios_version_attrs_is_visible, }; int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 5e375e9c4f5d..bf38fc69c1cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1195,29 +1195,69 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) amdgpu_connector->use_digital = true; } +/** + * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock + * @adev: pointer to amdgpu_device + * + * Return: maximum supported HDMI (TMDS) pixel clock in KHz. + */ +static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev) +{ + if (adev->asic_type >= CHIP_POLARIS10) + return 600000; + else if (adev->asic_type >= CHIP_TONGA) + return 300000; + else + return 297000; +} + +/** + * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors + * @connector: DRM connector to validate the mode on + * @mode: display mode to validate + * + * Validate the given display mode on DVI and HDMI connectors, including + * analog signals on DVI-I. + * + * Return: drm_mode_status indicating whether the mode is valid. + */ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, const struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev); + const int max_dvi_single_link_pixel_clock = 165000; + int max_digital_pixel_clock_khz; /* XXX check mode bandwidth */ - if (amdgpu_connector->use_digital && (mode->clock > 165000)) { - if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) || - (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) || - (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) { - return MODE_OK; - } else if (connector->display_info.is_hdmi) { - /* HDMI 1.3+ supports max clock of 340 Mhz */ - if (mode->clock > 340000) - return MODE_CLOCK_HIGH; - else - return MODE_OK; - } else { - return MODE_CLOCK_HIGH; + if (amdgpu_connector->use_digital) { + switch (amdgpu_connector->connector_object_id) { + case CONNECTOR_OBJECT_ID_HDMI_TYPE_A: + max_digital_pixel_clock_khz = max_hdmi_pixel_clock; + break; + case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I: + case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D: + max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock; + break; + case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I: + case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D: + case CONNECTOR_OBJECT_ID_HDMI_TYPE_B: + max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2; + break; } + + /* When the display EDID claims that it's an HDMI display, + * we use the HDMI encoder mode of the display HW, + * so we should verify against the max HDMI clock here. + */ + if (connector->display_info.is_hdmi) + max_digital_pixel_clock_khz = max_hdmi_pixel_clock; + + if (mode->clock > max_digital_pixel_clock_khz) + return MODE_CLOCK_HIGH; } /* check against the max pixel clock */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 25252231a68a..ef996493115f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -68,7 +68,6 @@ void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, hdr->error_severity = sev; hdr->valid_bits.platform_id = 1; - hdr->valid_bits.partition_id = 1; hdr->valid_bits.timestamp = 1; amdgpu_cper_get_timestamp(&hdr->timestamp); @@ -174,7 +173,7 @@ int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, struct cper_sec_nonstd_err *section; bool poison; - poison = (sev == CPER_SEV_NON_FATAL_CORRECTED) ? false : true; + poison = sev != CPER_SEV_NON_FATAL_CORRECTED; section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); @@ -206,6 +205,7 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev { struct cper_sec_desc *section_desc; struct cper_sec_nonstd_err *section; + uint32_t socket_id; section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + @@ -219,11 +219,17 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev section->hdr.valid_bits.err_context_cnt = 1; section->info.error_type = RUNTIME; + section->info.valid_bits.ms_chk = 1; section->info.ms_chk_bits.err_type_valid = 1; + section->info.ms_chk_bits.err_type = 1; + section->info.ms_chk_bits.pcc = 1; section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); /* Hardcoded Reg dump for bad page threshold CPER */ + socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? + adev->smuio.funcs->get_socket_id(adev) : + 0; section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1; section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137; @@ -234,8 +240,8 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2; section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff; - section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = 0x0; - section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x96; + section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01; + section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12); section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d3f220be2ef9..2ac9729e4c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -396,7 +396,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_DELAYED, ib); if (r) { - DRM_ERROR("Failed to get ib !\n"); + drm_err(adev_to_drm(p->adev), "Failed to get ib !\n"); return r; } @@ -468,7 +468,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n", handle, point, r); return r; } @@ -902,7 +902,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, sizeof(struct page *), GFP_KERNEL); if (!e->user_pages) { - DRM_ERROR("kvmalloc_array failure\n"); + drm_err(adev_to_drm(p->adev), "kvmalloc_array failure\n"); r = -ENOMEM; goto out_free_user_pages; } @@ -983,7 +983,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, amdgpu_cs_bo_validate, p); if (r) { - DRM_ERROR("amdgpu_vm_validate() failed.\n"); + drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; } @@ -1061,13 +1061,13 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); if (r) { - DRM_ERROR("IB va_start is invalid\n"); + drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n"); return r; } if ((va_start + ib->length_dw * 4) > (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -1238,7 +1238,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); if (r) { if (r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n"); return r; } @@ -1451,7 +1451,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { - DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r); + drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r); return r; } @@ -1466,9 +1466,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) - DRM_ERROR("Not enough memory for command submission!\n"); + drm_err(dev, "Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_DEBUG("Failed to process the buffer list %d!\n", r); + drm_dbg(dev, "Failed to process the buffer list %d!\n", r); goto error_fini; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index dfb6cfd83760..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e6e2e2acf5b..a70651050acf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2136,12 +2136,14 @@ static int amdgpu_pt_info_read(struct seq_file *m, void *unused) struct drm_file *file; struct amdgpu_fpriv *fpriv; struct amdgpu_bo *root_bo; + struct amdgpu_device *adev; int r; file = m->private; if (!file) return -EINVAL; + adev = drm_to_adev(file->minor->dev); fpriv = file->driver_priv; if (!fpriv || !fpriv->vm.root.bo) return -ENODEV; @@ -2153,7 +2155,11 @@ static int amdgpu_pt_info_read(struct seq_file *m, void *unused) return -EINVAL; } - seq_printf(m, "gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo)); + seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo)); + seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn); + seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level); + seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size); + seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size); amdgpu_bo_unreserve(root_bo); amdgpu_bo_unref(&root_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 01d234cf8156..bdfb80377e6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -95,6 +95,7 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 @@ -178,6 +179,8 @@ struct amdgpu_init_level amdgpu_init_minimal_xgmi = { BIT(AMD_IP_BLOCK_TYPE_PSP) }; +static void amdgpu_device_load_switch_state(struct amdgpu_device *adev); + static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, enum amd_ip_block_type block) { @@ -2445,6 +2448,33 @@ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, return 1; } +static const char *ip_block_names[] = { + [AMD_IP_BLOCK_TYPE_COMMON] = "common", + [AMD_IP_BLOCK_TYPE_GMC] = "gmc", + [AMD_IP_BLOCK_TYPE_IH] = "ih", + [AMD_IP_BLOCK_TYPE_SMC] = "smu", + [AMD_IP_BLOCK_TYPE_PSP] = "psp", + [AMD_IP_BLOCK_TYPE_DCE] = "dce", + [AMD_IP_BLOCK_TYPE_GFX] = "gfx", + [AMD_IP_BLOCK_TYPE_SDMA] = "sdma", + [AMD_IP_BLOCK_TYPE_UVD] = "uvd", + [AMD_IP_BLOCK_TYPE_VCE] = "vce", + [AMD_IP_BLOCK_TYPE_ACP] = "acp", + [AMD_IP_BLOCK_TYPE_VCN] = "vcn", + [AMD_IP_BLOCK_TYPE_MES] = "mes", + [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg", + [AMD_IP_BLOCK_TYPE_VPE] = "vpe", + [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm", + [AMD_IP_BLOCK_TYPE_ISP] = "isp", +}; + +static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type) +{ + int idx = (int)type; + + return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown"; +} + /** * amdgpu_device_ip_block_add * @@ -2473,8 +2503,13 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - dev_info(adev->dev, "detected ip block number %d <%s>\n", - adev->num_ip_blocks, ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n", + adev->num_ip_blocks, + ip_block_name(adev, ip_block_version->type), + ip_block_version->major, + ip_block_version->minor, + ip_block_version->rev, + ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks].adev = adev; @@ -2595,6 +2630,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) return 0; chip_name = "navi12"; break; + case CHIP_CYAN_SKILLFISH: + chip_name = "cyan_skillfish"; + break; } err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, @@ -2674,6 +2712,24 @@ out: return err; } +static void amdgpu_uid_init(struct amdgpu_device *adev) +{ + /* Initialize the UID for the device */ + adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL); + if (!adev->uid_info) { + dev_warn(adev->dev, "Failed to allocate memory for UID\n"); + return; + } + adev->uid_info->adev = adev; +} + +static void amdgpu_uid_fini(struct amdgpu_device *adev) +{ + /* Free the UID memory */ + kfree(adev->uid_info); + adev->uid_info = NULL; +} + /** * amdgpu_device_ip_early_init - run early init for hardware IPs * @@ -2857,6 +2913,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (adev->gmc.xgmi.supported) amdgpu_xgmi_early_init(adev); + if (amdgpu_is_multi_aid(adev)) + amdgpu_uid_init(adev); ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block->status.valid != false) amdgpu_amdkfd_device_probe(adev); @@ -3389,7 +3447,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void) for (i = 0; i < mgpu_info.num_dgpu; i++) { gpu_ins = &(mgpu_info.gpu_ins[i]); adev = gpu_ins->adev; - if (!(adev->flags & AMD_IS_APU) && + if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) && !gpu_ins->mgpu_fan_enabled) { ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); if (ret) @@ -3648,6 +3706,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) } amdgpu_ras_fini(adev); + amdgpu_uid_fini(adev); return 0; } @@ -4992,7 +5051,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) adev->reset_domain = NULL; kfree(adev->pci_state); - + kfree(adev->pcie_reset_ctx.swds_pcistate); + kfree(adev->pcie_reset_ctx.swus_pcistate); } /** @@ -5701,7 +5761,7 @@ int amdgpu_device_link_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU link reset\n"); - if (!adev->pcie_reset_ctx.occurs_dpc) + if (!amdgpu_reset_in_dpc(adev)) ret = amdgpu_dpm_link_reset(adev); if (ret) @@ -5830,6 +5890,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ + amdgpu_reset_set_dpc_status(tmp_adev, false); amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { @@ -6136,12 +6197,11 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, +static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev, struct list_head *device_list, struct amdgpu_hive_info *hive) { struct amdgpu_device *tmp_adev = NULL; - int r; /* * Build list of devices to reset. @@ -6153,7 +6213,7 @@ static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; - if (adev->pcie_reset_ctx.occurs_dpc) + if (amdgpu_reset_in_dpc(adev)) tmp_adev->pcie_reset_ctx.in_link_reset = true; } if (!list_is_first(&adev->reset_list, device_list)) @@ -6161,14 +6221,6 @@ static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, } else { list_add_tail(&adev->reset_list, device_list); } - - if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { - r = amdgpu_device_health_check(device_list); - if (r) - return r; - } - - return 0; } static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, @@ -6237,9 +6289,8 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev, drm_client_dev_suspend(adev_to_drm(tmp_adev), false); /* disable ras on ALL IPs */ - if (!need_emergency_restart && - (!adev->pcie_reset_ctx.occurs_dpc) && - amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -6267,11 +6318,7 @@ static int amdgpu_device_asic_reset(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list, reset_list) { - if (adev->pcie_reset_ctx.occurs_dpc) - tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); - if (adev->pcie_reset_ctx.occurs_dpc) - tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -6449,8 +6496,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, emergency_restart(); } - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU %s begin!. Source: %d\n", + need_emergency_restart ? "jobs stop" : "reset", + reset_context->src); if (!amdgpu_sriov_vf(adev)) hive = amdgpu_get_xgmi_hive(adev); @@ -6461,8 +6509,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->hive = hive; INIT_LIST_HEAD(&device_list); - if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) - goto end_reset; + amdgpu_device_recovery_prepare(adev, &device_list, hive); + + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(&device_list); + if (r) + goto end_reset; + } /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); @@ -6890,11 +6943,6 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (!amdgpu_dpm_is_link_reset_supported(adev)) { - dev_warn(adev->dev, "No support for XGMI hive yet...\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - adev->pci_channel_state = state; switch (state) { @@ -6904,10 +6952,23 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta case pci_channel_io_frozen: /* Fatal error, prepare for slot reset */ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); + if (hive) { + /* Hive devices should be able to support FW based + * link reset on other devices, if not return. + */ + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, + "No support for XGMI hive yet...\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + /* Set dpc status only if device is part of hive + * Non-hive devices should be able to recover after + * link reset. + */ + amdgpu_reset_set_dpc_status(adev, true); - if (hive) mutex_lock(&hive->hive_lock); - adev->pcie_reset_ctx.occurs_dpc = true; + } memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); @@ -6966,22 +7027,34 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct amdgpu_device *tmp_adev; struct amdgpu_hive_info *hive; struct list_head device_list; - int r = 0, i; + struct pci_dev *link_dev; + int r = 0, i, timeout; u32 memsize; - - /* PCI error slot reset should be skipped During RAS recovery */ - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && - amdgpu_ras_in_recovery(adev)) - return PCI_ERS_RESULT_RECOVERED; + u16 status; dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - /* wait for asic to come out of reset */ - msleep(700); + if (adev->pcie_reset_ctx.swus) + link_dev = adev->pcie_reset_ctx.swus; + else + link_dev = adev->pdev; + /* wait for asic to come out of reset, timeout = 10s */ + timeout = 10000; + do { + usleep_range(10000, 10500); + r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status); + timeout -= 10; + } while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) && + (status != PCI_VENDOR_ID_AMD)); + + if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) { + r = -ETIME; + goto out; + } + amdgpu_device_load_switch_state(adev); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -7076,7 +7149,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev) amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); amdgpu_device_recovery_put_reset_lock(adev, &device_list); - adev->pcie_reset_ctx.occurs_dpc = false; if (hive) { mutex_unlock(&hive->hive_lock); @@ -7084,6 +7156,58 @@ void amdgpu_pci_resume(struct pci_dev *pdev) } } +static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev) +{ + struct pci_dev *parent = pci_upstream_bridge(adev->pdev); + int r; + + if (parent->vendor != PCI_VENDOR_ID_ATI) + return; + + /* If already saved, return */ + if (adev->pcie_reset_ctx.swus) + return; + /* Upstream bridge is ATI, assume it's SWUS/DS architecture */ + r = pci_save_state(parent); + if (r) + return; + adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent); + + parent = pci_upstream_bridge(parent); + r = pci_save_state(parent); + if (r) + return; + adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent); + + adev->pcie_reset_ctx.swus = parent; +} + +static void amdgpu_device_load_switch_state(struct amdgpu_device *adev) +{ + struct pci_dev *pdev; + int r; + + if (!adev->pcie_reset_ctx.swds_pcistate || + !adev->pcie_reset_ctx.swus_pcistate) + return; + + pdev = adev->pcie_reset_ctx.swus; + r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate); + if (!r) { + pci_restore_state(pdev); + } else { + dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r); + return; + } + + pdev = pci_upstream_bridge(adev->pdev); + r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate); + if (!r) + pci_restore_state(pdev); + else + dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r); +} + bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); @@ -7108,6 +7232,8 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) return false; } + amdgpu_device_cache_switch_state(adev); + return true; } @@ -7494,3 +7620,53 @@ ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) size += sysfs_emit_at(buf, size, "\n"); return size; } + +void amdgpu_device_set_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst, + uint64_t uid) +{ + if (!uid_info) + return; + + if (type >= AMDGPU_UID_TYPE_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n", + type); + return; + } + + if (inst >= AMDGPU_UID_INST_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n", + inst); + return; + } + + if (uid_info->uid[type][inst] != 0) { + dev_warn_once( + uid_info->adev->dev, + "Overwriting existing UID %llu for type %d instance %d\n", + uid_info->uid[type][inst], type, inst); + } + + uid_info->uid[type][inst] = uid; +} + +u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst) +{ + if (!uid_info) + return 0; + + if (type >= AMDGPU_UID_TYPE_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n", + type); + return 0; + } + + if (inst >= AMDGPU_UID_INST_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n", + inst); + return 0; + } + + return uid_info->uid[type][inst]; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index efe0058b48ca..73401f0aeb34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2124,7 +2124,6 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 7): - case IP_VERSION(11, 0, 8): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): @@ -2132,6 +2131,10 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); break; + case IP_VERSION(11, 0, 8): + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); @@ -2746,6 +2749,36 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0); adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) + return -EINVAL; + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } else { + cyan_skillfish_reg_base_init(adev); + adev->sdma.num_instances = 2; + adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3); + adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3); + adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1); + adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0); + adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1); + adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1); + adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1); + adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3); + adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3); + } + break; default: r = amdgpu_discovery_reg_base_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ff98c87b2e0b..8561ad7f6180 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return ret; } +static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int ret; + + /* + * Pin to keep buffer in place while it's vmap'ed. The actual + * domain is not that important as long as it's mapable. Using + * GTT and VRAM should be compatible with most use cases. + */ + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM); + if (ret) + return ret; + ret = drm_gem_dmabuf_vmap(dma_buf, map); + if (ret) + amdgpu_bo_unpin(bo); + + return ret; +} + +static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + drm_gem_dmabuf_vunmap(dma_buf, map); + amdgpu_bo_unpin(bo); +} + const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, .pin = amdgpu_dma_buf_pin, @@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, + .vmap = amdgpu_dma_buf_vmap, + .vunmap = amdgpu_dma_buf_vunmap, }; /** @@ -313,11 +343,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); struct dma_buf *buf; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&bo->tbo, &ctx); + if (ret) + return ERR_PTR(ret); + buf = drm_gem_prime_export(gobj, flags); if (!IS_ERR(buf)) buf->ops = &amdgpu_dmabuf_ops; @@ -514,7 +556,7 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, return false; if (drm_gem_is_imported(obj)) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* No XGMI with non AMD GPUs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 395c6be901ce..ece251cbe8c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -886,7 +886,7 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); /** * DOC: dcdebugmask (uint) - * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h. */ MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); @@ -2172,6 +2172,11 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, /* CYAN_SKILLFISH */ + {0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, @@ -2597,6 +2602,7 @@ static int amdgpu_pmops_suspend(struct device *dev) else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; if (!adev->in_s0ix && !adev->in_s3) { +#if IS_ENABLED(CONFIG_SUSPEND) /* don't allow going deep first time followed by s2idle the next time */ if (adev->last_suspend_state != PM_SUSPEND_ON && adev->last_suspend_state != pm_suspend_target_state) { @@ -2604,11 +2610,14 @@ static int amdgpu_pmops_suspend(struct device *dev) pm_suspend_target_state); return -EINVAL; } +#endif return 0; } +#if IS_ENABLED(CONFIG_SUSPEND) /* cache the state last used for suspend */ adev->last_suspend_state = pm_suspend_target_state; +#endif return amdgpu_device_suspend(drm_dev, true); } @@ -2933,11 +2942,14 @@ static int amdgpu_drm_release(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct drm_device *dev = file_priv->minor->dev; + int idx; - if (fpriv) { + if (fpriv && drm_dev_enter(dev, &idx)) { fpriv->evf_mgr.fd_closing = true; amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + drm_dev_exit(idx); } return drm_release(inode, filp); @@ -2964,15 +2976,15 @@ out: } static const struct dev_pm_ops amdgpu_pm_ops = { - .prepare = amdgpu_pmops_prepare, - .complete = amdgpu_pmops_complete, - .suspend = amdgpu_pmops_suspend, - .suspend_noirq = amdgpu_pmops_suspend_noirq, - .resume = amdgpu_pmops_resume, - .freeze = amdgpu_pmops_freeze, - .thaw = amdgpu_pmops_thaw, - .poweroff = amdgpu_pmops_poweroff, - .restore = amdgpu_pmops_restore, + .prepare = pm_sleep_ptr(amdgpu_pmops_prepare), + .complete = pm_sleep_ptr(amdgpu_pmops_complete), + .suspend = pm_sleep_ptr(amdgpu_pmops_suspend), + .suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq), + .resume = pm_sleep_ptr(amdgpu_pmops_resume), + .freeze = pm_sleep_ptr(amdgpu_pmops_freeze), + .thaw = pm_sleep_ptr(amdgpu_pmops_thaw), + .poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff), + .restore = pm_sleep_ptr(amdgpu_pmops_restore), .runtime_suspend = amdgpu_pmops_runtime_suspend, .runtime_resume = amdgpu_pmops_runtime_resume, .runtime_idle = amdgpu_pmops_runtime_idle, @@ -3044,6 +3056,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { @@ -3117,7 +3130,7 @@ static struct pci_driver amdgpu_kms_pci_driver = { .probe = amdgpu_pci_probe, .remove = amdgpu_pci_remove, .shutdown = amdgpu_pci_shutdown, - .driver.pm = &amdgpu_pm_ops, + .driver.pm = pm_ptr(&amdgpu_pm_ops), .err_handler = &amdgpu_pci_err_handler, .dev_groups = amdgpu_sysfs_groups, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 9e7506965cab..fd8cca241da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -120,7 +120,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); if (!am_fence) return -ENOMEM; - am_fence->context = 0; } else { am_fence = af; } @@ -738,7 +737,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) } -/** +/* * Kernel queue reset handling * * The driver can reset individual queues for most engines, but those queues diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6626a6e64ff5..c049848a56b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -317,7 +317,8 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, */ if (!vm->is_compute_context || !vm->process_info) return 0; - if (!drm_gem_is_imported(obj) || !dma_buf_is_dynamic(obj->dma_buf)) + if (!drm_gem_is_imported(obj) || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) return 0; mutex_lock_nested(&vm->process_info->lock, 1); if (!WARN_ON(!vm->process_info->eviction_fence)) { @@ -442,15 +443,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, int r; /* reject invalid gem flags */ - if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | - AMDGPU_GEM_CREATE_EXPLICIT_SYNC | - AMDGPU_GEM_CREATE_ENCRYPTED | - AMDGPU_GEM_CREATE_GFX12_DCC | - AMDGPU_GEM_CREATE_DISCARDABLE)) + if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK) return -EINVAL; /* reject invalid gem domains */ @@ -790,36 +783,6 @@ error: return fence; } -/** - * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags - * - * @adev: amdgpu_device pointer - * @flags: GEM UAPI flags - * - * Returns the GEM UAPI flags mapped into hardware for the ASIC. - */ -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT_FLAG(adev); - if (flags & AMDGPU_VM_PAGE_NOALLOC) - pte_flag |= AMDGPU_PTE_NOALLOC; - - if (adev->gmc.gmc_funcs->map_mtype) - pte_flag |= amdgpu_gmc_map_mtype(adev, - flags & AMDGPU_VM_MTYPE_MASK); - - return pte_flag; -} - int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -840,7 +803,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct dma_fence_chain *timeline_chain = NULL; struct dma_fence *fence; struct drm_exec exec; - uint64_t va_flags; uint64_t vm_size; int r = 0; @@ -944,10 +906,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, - va_flags); + args->flags); break; case AMDGPU_VA_OP_UNMAP: r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address); @@ -959,10 +920,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, - va_flags); + args->flags); break; default: break; @@ -996,17 +956,34 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *gobj; struct amdgpu_vm_bo_base *base; struct amdgpu_bo *robj; + struct drm_exec exec; + struct amdgpu_fpriv *fpriv = filp->driver_priv; int r; + if (args->padding) + return -EINVAL; + gobj = drm_gem_object_lookup(filp, args->handle); if (!gobj) return -ENOENT; robj = gem_to_amdgpu_bo(gobj); - r = amdgpu_bo_reserve(robj, false); - if (unlikely(r)) - goto out; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, gobj); + drm_exec_retry_on_contention(&exec); + if (r) + goto out_exec; + + if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (r) + goto out_exec; + } + } switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { @@ -1017,7 +994,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, info.alignment = robj->tbo.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; - amdgpu_bo_unreserve(robj); + drm_exec_fini(&exec); if (copy_to_user(out, &info, sizeof(info))) r = -EFAULT; break; @@ -1026,20 +1003,17 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, if (drm_gem_is_imported(&robj->tbo.base) && args->value & AMDGPU_GEM_DOMAIN_VRAM) { r = -EINVAL; - amdgpu_bo_unreserve(robj); - break; + goto out_exec; } if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) { r = -EPERM; - amdgpu_bo_unreserve(robj); - break; + goto out_exec; } for (base = robj->vm_bo; base; base = base->next) if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev), amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) { r = -EINVAL; - amdgpu_bo_unreserve(robj); - goto out; + goto out_exec; } @@ -1052,17 +1026,146 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) amdgpu_vm_bo_invalidate(robj, true); + drm_exec_fini(&exec); + break; + case AMDGPU_GEM_OP_GET_MAPPING_INFO: { + struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj); + struct drm_amdgpu_gem_vm_entry *vm_entries; + struct amdgpu_bo_va_mapping *mapping; + int num_mappings = 0; + /* + * num_entries is set as an input to the size of the user-allocated array of + * drm_amdgpu_gem_vm_entry stored at args->value. + * num_entries is sent back as output as the number of mappings the bo has. + * If that number is larger than the size of the array, the ioctl must + * be retried. + */ + vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL); + if (!vm_entries) + return -ENOMEM; + + amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) { + if (num_mappings < args->num_entries) { + vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].offset = mapping->offset; + vm_entries[num_mappings].flags = mapping->flags; + } + num_mappings += 1; + } + + amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) { + if (num_mappings < args->num_entries) { + vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].offset = mapping->offset; + vm_entries[num_mappings].flags = mapping->flags; + } + num_mappings += 1; + } - amdgpu_bo_unreserve(robj); + drm_exec_fini(&exec); + + if (num_mappings > 0 && num_mappings <= args->num_entries) + if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries))) + r = -EFAULT; + + args->num_entries = num_mappings; + + kvfree(vm_entries); break; + } default: - amdgpu_bo_unreserve(robj); + drm_exec_fini(&exec); r = -EINVAL; } -out: drm_gem_object_put(gobj); return r; +out_exec: + drm_exec_fini(&exec); + drm_gem_object_put(gobj); + return r; +} + +/** + * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects + * + * @dev: drm device pointer + * @data: drm_amdgpu_gem_list_handles + * @filp: drm file pointer + * + * num_entries is set as an input to the size of the entries array. + * num_entries is sent back as output as the number of bos in the process. + * If that number is larger than the size of the array, the ioctl must + * be retried. + * + * Returns: + * 0 for success, -errno for errors. + */ +int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_amdgpu_gem_list_handles *args = data; + struct drm_amdgpu_gem_list_handles_entry *bo_entries; + struct drm_gem_object *gobj; + int id, ret = 0; + int bo_index = 0; + int num_bos = 0; + + spin_lock(&filp->table_lock); + idr_for_each_entry(&filp->object_idr, gobj, id) + num_bos += 1; + spin_unlock(&filp->table_lock); + + if (args->num_entries < num_bos) { + args->num_entries = num_bos; + return 0; + } + + if (num_bos == 0) { + args->num_entries = 0; + return 0; + } + + bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL); + if (!bo_entries) + return -ENOMEM; + + spin_lock(&filp->table_lock); + idr_for_each_entry(&filp->object_idr, gobj, id) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + struct drm_amdgpu_gem_list_handles_entry *bo_entry; + + if (bo_index >= num_bos) { + ret = -EAGAIN; + break; + } + + bo_entry = &bo_entries[bo_index]; + + bo_entry->size = amdgpu_bo_size(bo); + bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK; + bo_entry->preferred_domains = bo->preferred_domains; + bo_entry->gem_handle = id; + bo_entry->alignment = bo->tbo.page_alignment; + + if (bo->tbo.base.import_attach) + bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT; + + bo_index += 1; + } + spin_unlock(&filp->table_lock); + + args->num_entries = bo_index; + + if (!ret) + if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries))) + ret = -EFAULT; + + kvfree(bo_entries); + + return ret; } static int amdgpu_gem_align_pitch(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index 3a8f57900a3a..b558336bc4c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -63,13 +63,28 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +#define AMDGPU_GEM_CREATE_SETTABLE_MASK (AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \ + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \ + AMDGPU_GEM_CREATE_CPU_GTT_USWC | \ + AMDGPU_GEM_CREATE_VRAM_CLEARED | \ + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \ + AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \ + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \ + AMDGPU_GEM_CREATE_ENCRYPTED | \ + AMDGPU_GEM_CREATE_GFX12_DCC | \ + AMDGPU_GEM_CREATE_DISCARDABLE | \ + AMDGPU_GEM_CREATE_COHERENT | \ + AMDGPU_GEM_CREATE_UNCACHED | \ + AMDGPU_GEM_CREATE_EXT_COHERENT) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c80c8f543532..98aa99b314c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1474,7 +1474,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) owner = (void *)(unsigned long)atomic_inc_return(&counter); r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, - 64, 0, &job); + 64, 0, &job, + AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 97b562a79ea8..9dcf51991b5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -690,7 +690,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); + &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB); if (r) goto error_alloc; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 397c6ccdb903..55097ca10738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -154,15 +154,15 @@ struct amdgpu_gmc_funcs { unsigned pasid); /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* map mtype to hardware flags */ - uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); - /* get the pte flags to use for a BO VA mapping */ + /* get the pte flags to use for PTEs */ void (*get_vm_pte)(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags); + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, + uint64_t *pte_flags); /* override per-page pte flags */ void (*override_vm_pte_flags)(struct amdgpu_device *dev, struct amdgpu_vm *vm, @@ -356,9 +356,10 @@ struct amdgpu_gmc { #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \ + ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \ + (pte_flags))) #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9b1c55115921..d020a890a0ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -209,11 +209,12 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job) + struct amdgpu_job **job, u64 k_job_id) { int r; - r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, 0); + r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, + k_job_id); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 2f302266662b..4a6487eb6cb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -44,6 +44,22 @@ struct amdgpu_fence; enum amdgpu_ib_pool_type; +/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */ +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE (18446744073709551615ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES (18446744073709551614ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE (18446744073709551613ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR (18446744073709551612ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER (18446744073709551611ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER (18446744073709551609ULL) +#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE (18446744073709551608ULL) +#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT (18446744073709551607ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER (18446744073709551606ULL) +#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER (18446744073709551605ULL) +#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL) +#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL) +#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL) + struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; @@ -96,7 +112,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job); + struct amdgpu_job **job, + u64 k_job_id); void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 82d58ac7afb0..22da65f45226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -121,10 +121,12 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]); } - if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) + if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) { + mutex_lock(&adev->jpeg.jpeg_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, AMD_PG_STATE_GATE); - else + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + } else schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); } @@ -194,7 +196,8 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -368,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - if (val & (1 << ((i * adev->jpeg.num_jpeg_rings) + j))) + if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j))) ring->sched.ready = true; else ring->sched.ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 135598502c8d..5bf9be073cdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -191,6 +191,20 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error_doorbell; + if (adev->mes.hung_queue_db_array_size) { + r = amdgpu_bo_create_kernel(adev, + adev->mes.hung_queue_db_array_size * sizeof(u32), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); + goto error_doorbell; + } + } + return 0; error_doorbell: @@ -216,6 +230,10 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) { int i; + amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); @@ -366,6 +384,53 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) +{ + return adev->mes.hung_queue_db_array_size; +} + +int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, + int queue_type, + bool detect_only, + unsigned int *hung_db_num, + u32 *hung_db_array) + +{ + struct mes_detect_and_reset_queue_input input; + u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr; + int r, i; + + if (!hung_db_num || !hung_db_array) + return -EINVAL; + + if ((queue_type != AMDGPU_RING_TYPE_GFX) && + (queue_type != AMDGPU_RING_TYPE_COMPUTE) && + (queue_type != AMDGPU_RING_TYPE_SDMA)) + return -EINVAL; + + /* Clear the doorbell array before detection */ + memset(adev->mes.hung_queue_db_array_cpu_addr, 0, + adev->mes.hung_queue_db_array_size * sizeof(u32)); + input.queue_type = queue_type; + input.detect_only = detect_only; + + r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, + &input); + if (r) { + dev_err(adev->dev, "failed to detect and reset\n"); + } else { + *hung_db_num = 0; + for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) { + if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { + hung_db_array[i] = db_array[i]; + *hung_db_num += 1; + } + } + } + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index c0d2c195fe2e..6b506fc72f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -41,6 +41,7 @@ #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 #define AMDGPU_MES_MSCRATCH_SIZE 0x40000 +#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, @@ -147,6 +148,10 @@ struct amdgpu_mes { uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES]; void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; + int hung_queue_db_array_size; + struct amdgpu_bo *hung_queue_db_array_gpu_obj; + uint64_t hung_queue_db_array_gpu_addr; + void *hung_queue_db_array_cpu_addr; }; struct amdgpu_mes_gang { @@ -280,6 +285,18 @@ struct mes_reset_queue_input { bool is_kq; }; +struct mes_detect_and_reset_queue_input { + uint32_t queue_type; + bool detect_only; +}; + +struct mes_inv_tlbs_pasid_input { + uint32_t xcc_id; + uint16_t pasid; + uint8_t hub_id; + uint8_t flush_type; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -367,6 +384,13 @@ struct amdgpu_mes_funcs { int (*reset_hw_queue)(struct amdgpu_mes *mes, struct mes_reset_queue_input *input); + + int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input); + + + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes, + struct mes_inv_tlbs_pasid_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -390,6 +414,13 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, unsigned int vmid, bool use_mmio); +int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev); +int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, + int queue_type, + bool detect_only, + unsigned int *hung_db_num, + u32 *hung_db_array); + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t val); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 6da4f946cac0..20460cfd09bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -496,8 +496,6 @@ struct amdgpu_crtc { struct drm_connector *connector; /* for dpm */ u32 line_time; - u32 wm_low; - u32 wm_high; u32 lb_vblank_lead_lines; struct drm_display_mode hw_mode; /* for virtual dce */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 122a88294883..d18bade9c98f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1313,7 +1313,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out; - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true, + AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); if (WARN_ON(r)) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c316920f3450..87523fcd4386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -69,7 +69,7 @@ struct amdgpu_bo_va_mapping { uint64_t last; uint64_t __subtree_last; uint64_t offset; - uint64_t flags; + uint32_t flags; }; /* User space allocated BO in a VM */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 23484317a5fa..3696f48c233b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -448,7 +448,7 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); - ret = -ENOMEM; + return -ENOMEM; } adev->psp.xgmi_context.supports_extended_data = @@ -506,8 +506,7 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); @@ -666,6 +665,10 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "FB_FW_RESERV_ADDR"; case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR: return "FB_FW_RESERV_EXT_ADDR"; + case GFX_CMD_ID_SRIOV_SPATIAL_PART: + return "SPATIAL_PARTITION"; + case GFX_CMD_ID_FB_NPS_MODE: + return "NPS_MODE_CHANGE"; default: return "UNKNOWN CMD"; } @@ -877,9 +880,7 @@ static int psp_tmr_init(struct psp_context *psp) pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_HAS_VRAM(psp->adev) ? - AMDGPU_GEM_DOMAIN_VRAM : - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 540817e296da..7fe5b1940df8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -122,12 +122,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) -#define MAX_UMC_POISON_POLLING_TIME_ASYNC 300 //ms +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10 #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms #define MAX_FLUSH_RETIRE_DWORK_TIMES 100 +#define BYPASS_ALLOCATED_ADDRESS 0x0 +#define BYPASS_INITIALIZATION_ADDRESS 0x1 + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -136,10 +139,14 @@ enum amdgpu_ras_retire_page_reservation { atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); -static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, uint64_t addr); -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, +static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); + +static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev); +static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev); + #ifdef CONFIG_X86_MCE_AMD static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); struct mce_notifier_adev_list { @@ -169,18 +176,16 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre struct eeprom_table_record err_rec; int ret; - if ((address >= adev->gmc.mc_vram_size) || - (address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = amdgpu_ras_check_bad_page(adev, address); + if (ret == -EINVAL) { dev_warn(adev->dev, - "RAS WARN: input address 0x%llx is invalid.\n", - address); + "RAS WARN: input address 0x%llx is invalid.\n", + address); return -EINVAL; - } - - if (amdgpu_ras_check_bad_page(adev, address)) { + } else if (ret == 1) { dev_warn(adev->dev, - "RAS WARN: 0x%llx has already been marked as bad page!\n", - address); + "RAS WARN: 0x%llx has already been marked as bad page!\n", + address); return 0; } @@ -207,6 +212,49 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre return 0; } +static int amdgpu_check_address_validity(struct amdgpu_device *adev, + uint64_t address, uint64_t flags) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_block_info blk_info; + uint64_t page_pfns[32] = {0}; + int i, ret, count; + + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) + return 0; + + if ((address >= adev->gmc.mc_vram_size) || + (address >= RAS_UMC_INJECT_ADDR_LIMIT)) + return -EFAULT; + + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, + address, page_pfns, ARRAY_SIZE(page_pfns)); + if (count <= 0) + return -EPERM; + + for (i = 0; i < count; i++) { + memset(&blk_info, 0, sizeof(blk_info)); + ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr, + page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info); + if (!ret) { + /* The input address that needs to be checked is allocated by + * current calling process, so it is necessary to exclude + * the calling process. + */ + if ((flags == BYPASS_ALLOCATED_ADDRESS) && + ((blk_info.task.pid != task_pid_nr(current)) || + strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN))) + return -EACCES; + else if ((flags == BYPASS_INITIALIZATION_ADDRESS) && + (blk_info.task.pid == con->init_task_pid) && + !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN)) + return -EACCES; + } + } + + return 0; +} + static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -297,6 +345,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, op = 2; else if (strstr(str, "retire_page") != NULL) op = 3; + else if (strstr(str, "check_address") != NULL) + op = 4; else if (str[0] && str[1] && str[2] && str[3]) /* ascii string, but commands are not matched. */ return -EINVAL; @@ -311,6 +361,15 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->inject.address = address; return 0; + } else if (op == 4) { + if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 && + sscanf(str, "%*s %llu %llu", &address, &value) != 2) + return -EINVAL; + + data->op = op; + data->inject.address = address; + data->inject.value = value; + return 0; } if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) @@ -500,6 +559,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, return size; else return ret; + } else if (data.op == 4) { + ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value); + return ret ? ret : size; } if (!amdgpu_ras_is_supported(adev, data.head.block)) @@ -513,22 +575,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - if ((data.inject.address >= adev->gmc.mc_vram_size && - adev->gmc.mc_vram_size) || - (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { - dev_warn(adev->dev, "RAS WARN: input address " - "0x%llx is invalid.", + /* umc ce/ue error injection for a bad page is not allowed */ + if (data.head.block == AMDGPU_RAS_BLOCK__UMC) + ret = amdgpu_ras_check_bad_page(adev, data.inject.address); + if (ret == -EINVAL) { + dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.", data.inject.address); - ret = -EINVAL; break; - } - - /* umc ce/ue error injection for a bad page is not allowed */ - if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && - amdgpu_ras_check_bad_page(adev, data.inject.address)) { - dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has " - "already been marked as bad!\n", - data.inject.address); + } else if (ret == 1) { + dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n", + data.inject.address); break; } @@ -2566,18 +2622,26 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, goto out; } - *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL); if (!*bps) { ret = -ENOMEM; goto out; } for (; i < data->count; i++) { + if (!data->bps[i].ts) + continue; + (*bps)[i] = (struct ras_badpage){ .bp = data->bps[i].retired_page, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; + + if (amdgpu_ras_check_critical_address(adev, + data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); if (status == -EBUSY) @@ -2586,7 +2650,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } - *count = data->count; + *count = con->bad_page_num; out: mutex_unlock(&con->recovery_lock); return ret; @@ -2722,7 +2786,7 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, unsigned int old_space = data->count + data->space_left; unsigned int new_space = old_space + pages; unsigned int align_space = ALIGN(new_space, 512); - void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL); if (!bps) { return -ENOMEM; @@ -2814,8 +2878,11 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, for (j = 0; j < count; j++) { if (amdgpu_ras_check_bad_page_unlock(con, - bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { + data->count++; + data->space_left--; continue; + } if (!data->space_left && amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { @@ -2828,6 +2895,7 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, sizeof(struct eeprom_table_record)); data->count++; data->space_left--; + con->bad_page_num++; } return 0; @@ -2974,7 +3042,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ret = __amdgpu_ras_convert_rec_array_from_rom(adev, &bps[i], &err_data, nps); if (ret) - control->ras_num_bad_pages -= adev->umc.retire_unit; + con->bad_page_num -= adev->umc.retire_unit; i += (adev->umc.retire_unit - 1); } else { break; @@ -2988,8 +3056,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ret = __amdgpu_ras_convert_rec_from_rom(adev, &bps[i], &err_data, nps); if (ret) - control->ras_num_bad_pages -= adev->umc.retire_unit; + con->bad_page_num -= adev->umc.retire_unit; } + + con->eh_data->count_saved = con->eh_data->count; } else { ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); } @@ -3012,7 +3082,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count, unit_num, bad_page_num, i; + int save_count, unit_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -3033,27 +3103,26 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - bad_page_num = control->ras_num_bad_pages; - save_count = data->count - bad_page_num; + unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs; + save_count = con->bad_page_num - control->ras_num_bad_pages; mutex_unlock(&con->recovery_lock); - unit_num = save_count / adev->umc.retire_unit; if (new_cnt) *new_cnt = unit_num; /* only new entries are saved */ - if (save_count > 0) { + if (unit_num > 0) { /*old asics only save pa to eeprom like before*/ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { if (amdgpu_ras_eeprom_append(control, - &data->bps[bad_page_num], save_count)) { + &data->bps[data->count_saved], unit_num)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; } } else { for (i = 0; i < unit_num; i++) { if (amdgpu_ras_eeprom_append(control, - &data->bps[bad_page_num + + &data->bps[data->count_saved + i * adev->umc.retire_unit], 1)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; @@ -3062,6 +3131,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); + data->count_saved = data->count; } return 0; @@ -3116,17 +3186,17 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) } } + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); + if (ret) + goto out; + ret = amdgpu_ras_eeprom_check(control); if (ret) goto out; /* HW not usable */ - if (amdgpu_ras_is_rma(adev)) { + if (amdgpu_ras_is_rma(adev)) ret = -EHWPOISON; - goto out; - } - - ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); } out: @@ -3134,18 +3204,24 @@ out: return ret; } -static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, uint64_t addr) { struct ras_err_handler_data *data = con->eh_data; + struct amdgpu_device *adev = con->adev; int i; + if ((addr >= adev->gmc.mc_vram_size && + adev->gmc.mc_vram_size) || + (addr >= RAS_UMC_INJECT_ADDR_LIMIT)) + return -EINVAL; + addr >>= AMDGPU_GPU_PAGE_SHIFT; for (i = 0; i < data->count; i++) if (addr == data->bps[i].retired_page) - return true; + return 1; - return false; + return 0; } /* @@ -3153,11 +3229,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, * * Note: this check is only for umc block */ -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, +static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool ret = false; + int ret = 0; if (!con || !con->eh_data) return ret; @@ -3241,7 +3317,7 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; - ecc_log->prev_de_queried_count = 0; + ecc_log->consumption_q_count = 0; } static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) @@ -3261,7 +3337,7 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) mutex_destroy(&ecc_log->lock); ecc_log->de_queried_count = 0; - ecc_log->prev_de_queried_count = 0; + ecc_log->consumption_q_count = 0; } static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con, @@ -3287,7 +3363,6 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) page_retirement_dwork.work); struct amdgpu_device *adev = con->adev; struct ras_err_data err_data; - unsigned long err_cnt; /* If gpu reset is ongoing, delay retiring the bad pages */ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) { @@ -3299,13 +3374,9 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_init(&err_data); amdgpu_umc_handle_bad_pages(adev, &err_data); - err_cnt = err_data.err_addr_cnt; amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && amdgpu_ras_is_rma(adev)) - amdgpu_ras_reset_gpu(adev); - amdgpu_ras_schedule_retirement_dwork(con, AMDGPU_RAS_RETIRE_PAGE_INTERVAL); } @@ -3316,49 +3387,39 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, int ret = 0; struct ras_ecc_log_info *ecc_log; struct ras_query_if info; - uint32_t timeout = 0; + u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - uint64_t de_queried_count; - uint32_t new_detect_count, total_detect_count; - uint32_t need_query_count = poison_creation_count; + u64 de_queried_count; + u64 consumption_q_count; enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; memset(&info, 0, sizeof(info)); info.head.block = AMDGPU_RAS_BLOCK__UMC; ecc_log = &ras->umc_ecc_log; - total_detect_count = 0; + ecc_log->de_queried_count = 0; + ecc_log->consumption_q_count = 0; + do { ret = amdgpu_ras_query_error_status_with_event(adev, &info, type); if (ret) return ret; de_queried_count = ecc_log->de_queried_count; - if (de_queried_count > ecc_log->prev_de_queried_count) { - new_detect_count = de_queried_count - ecc_log->prev_de_queried_count; - ecc_log->prev_de_queried_count = de_queried_count; - timeout = 0; - } else { - new_detect_count = 0; - } + consumption_q_count = ecc_log->consumption_q_count; - if (new_detect_count) { - total_detect_count += new_detect_count; - } else { - if (!timeout && need_query_count) - timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; + if (de_queried_count && consumption_q_count) + break; - if (timeout) { - if (!--timeout) - break; - msleep(1); - } - } - } while (total_detect_count < need_query_count); + msleep(100); + } while (--timeout); - if (total_detect_count) + if (de_queried_count) schedule_delayed_work(&ras->page_retirement_dwork, 0); + if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0) + amdgpu_ras_reset_gpu(adev); + return 0; } @@ -3394,6 +3455,12 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, reset_flags |= msg.reset; } + /* + * Try to ensure poison creation handler is completed first + * to set rma if bad page exceed threshold. + */ + flush_delayed_work(&con->page_retirement_dwork); + /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) @@ -3403,8 +3470,6 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, else reset = reset_flags; - flush_delayed_work(&con->page_retirement_dwork); - con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); @@ -3434,6 +3499,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) if (kthread_should_stop()) break; + mutex_lock(&con->poison_lock); gpu_reset = 0; do { @@ -3446,7 +3512,8 @@ static int amdgpu_ras_page_retirement_thread(void *param) atomic_sub(poison_creation_count, &con->poison_creation_count); atomic_sub(poison_creation_count, &con->page_retirement_req_cnt); } - } while (atomic_read(&con->poison_creation_count)); + } while (atomic_read(&con->poison_creation_count) && + !atomic_read(&con->poison_consumption_count)); if (ret != -EIO) { msg_count = kfifo_len(&con->poison_fifo); @@ -3463,6 +3530,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */ /* Clear poison creation request */ atomic_set(&con->poison_creation_count, 0); + atomic_set(&con->poison_consumption_count, 0); /* Clear poison fifo */ amdgpu_ras_clear_poison_fifo(adev); @@ -3487,9 +3555,12 @@ static int amdgpu_ras_page_retirement_thread(void *param) atomic_sub(msg_count, &con->page_retirement_req_cnt); } + atomic_set(&con->poison_consumption_count, 0); + /* Wake up work to save bad pages to eeprom */ schedule_delayed_work(&con->page_retirement_dwork, 0); } + mutex_unlock(&con->poison_lock); } return 0; @@ -3570,8 +3641,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) } mutex_init(&con->recovery_lock); + mutex_init(&con->poison_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); + atomic_set(&con->rma_in_recovery, 0); con->eeprom_control.bad_channel_bitmap = 0; max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control); @@ -3589,6 +3662,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); atomic_set(&con->poison_creation_count, 0); + atomic_set(&con->poison_consumption_count, 0); con->page_retirement_thread = kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); if (IS_ERR(con->page_retirement_thread)) { @@ -3661,6 +3735,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) kfree(data); mutex_unlock(&con->recovery_lock); + amdgpu_ras_critical_region_init(adev); + return 0; } /* recovery end */ @@ -4087,6 +4163,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } + con->init_task_pid = task_pid_nr(current); + get_task_comm(con->init_task_comm, current); + + mutex_init(&con->critical_region_lock); + INIT_LIST_HEAD(&con->critical_region_head); + dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", adev->ras_hw_enabled, adev->ras_enabled); @@ -4366,6 +4448,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) if (!adev->ras_enabled || !con) return 0; + amdgpu_ras_critical_region_fini(adev); + mutex_destroy(&con->critical_region_lock); + list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) { if (ras_node->ras_obj) { obj = ras_node->ras_obj; @@ -5274,6 +5359,9 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn) uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT; int ret = 0; + if (amdgpu_ras_check_critical_address(adev, start)) + return 0; + mutex_lock(&con->page_rsv_lock); ret = amdgpu_vram_mgr_query_page_status(mgr, start); if (ret == -ENOENT) @@ -5310,3 +5398,80 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev) return con->is_rma; } + +int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, + struct amdgpu_bo *bo) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_mgr_resource *vres; + struct ras_critical_region *region; + struct drm_buddy_block *block; + int ret = 0; + + if (!bo || !bo->tbo.resource) + return -EINVAL; + + vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource); + + mutex_lock(&con->critical_region_lock); + + /* Check if the bo had been recorded */ + list_for_each_entry(region, &con->critical_region_head, node) + if (region->bo == bo) + goto out; + + /* Record new critical amdgpu bo */ + list_for_each_entry(block, &vres->blocks, link) { + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) { + ret = -ENOMEM; + goto out; + } + region->bo = bo; + region->start = amdgpu_vram_mgr_block_start(block); + region->size = amdgpu_vram_mgr_block_size(block); + list_add_tail(®ion->node, &con->critical_region_head); + } + +out: + mutex_unlock(&con->critical_region_lock); + + return ret; +} + +static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev) +{ + amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory); +} + +static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_critical_region *region, *tmp; + + mutex_lock(&con->critical_region_lock); + list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) { + list_del(®ion->node); + kfree(region); + } + mutex_unlock(&con->critical_region_lock); +} + +bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_critical_region *region; + bool ret = false; + + mutex_lock(&con->critical_region_lock); + list_for_each_entry(region, &con->critical_region_head, node) { + if ((region->start <= addr) && + (addr < (region->start + region->size))) { + ret = true; + break; + } + } + mutex_unlock(&con->critical_region_lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 927d6bff734a..6cf0dfd38be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -492,8 +492,15 @@ struct ras_ecc_err { struct ras_ecc_log_info { struct mutex lock; struct radix_tree_root de_page_tree; - uint64_t de_queried_count; - uint64_t prev_de_queried_count; + uint64_t de_queried_count; + uint64_t consumption_q_count; +}; + +struct ras_critical_region { + struct list_head node; + struct amdgpu_bo *bo; + uint64_t start; + uint64_t size; }; struct amdgpu_ras { @@ -515,6 +522,7 @@ struct amdgpu_ras { /* gpu recovery */ struct work_struct recovery_work; atomic_t in_recovery; + atomic_t rma_in_recovery; struct amdgpu_device *adev; /* error handler data */ struct ras_err_handler_data *eh_data; @@ -557,6 +565,7 @@ struct amdgpu_ras { struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; atomic_t poison_creation_count; + atomic_t poison_consumption_count; struct mutex page_rsv_lock; DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); struct ras_ecc_log_info umc_ecc_log; @@ -570,6 +579,17 @@ struct amdgpu_ras { struct ras_event_manager *event_mgr; uint64_t reserved_pages_in_bytes; + + pid_t init_task_pid; + char init_task_comm[TASK_COMM_LEN]; + + int bad_page_num; + + struct list_head critical_region_head; + struct mutex critical_region_lock; + + /* Protect poison injection */ + struct mutex poison_lock; }; struct ras_fs_data { @@ -608,6 +628,7 @@ struct ras_err_handler_data { struct eeprom_table_record *bps; /* the count of entries */ int count; + int count_saved; /* the space can place new entries */ int space_left; }; @@ -973,6 +994,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); +int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo); +bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr); + int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 9bda9ad13f88..3eb3fb55ccb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -743,8 +743,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, else control->ras_num_mca_recs += num; - control->ras_num_bad_pages = control->ras_num_pa_recs + - control->ras_num_mca_recs * adev->umc.retire_unit; + control->ras_num_bad_pages = con->bad_page_num; Out: kfree(buf); return res; @@ -766,6 +765,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); + + if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev)) + dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); + if ((amdgpu_bad_page_threshold != -1) && (amdgpu_bad_page_threshold != -2)) { control->tbl_hdr.header = RAS_TABLE_HDR_BAD; @@ -774,9 +777,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.health_percent = 0; } ras->is_rma = true; - /* ignore the -ENOTSUPP return value */ - amdgpu_dpm_send_rma_reason(adev); } + + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); } if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) @@ -1457,8 +1461,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) if (!__get_eeprom_i2c_addr(adev, control)) return -EINVAL; - control->ras_num_bad_pages = control->ras_num_pa_recs + - control->ras_num_mca_recs * adev->umc.retire_unit; + control->ras_num_bad_pages = ras->bad_page_num; if (hdr->header == RAS_TABLE_HDR_VAL) { dev_dbg(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index dabfbdf6f1ce..28c4ad62f50e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -340,6 +340,9 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, case AMDGPU_RESET_SRC_USER: strscpy(buf, "user trigger", len); break; + case AMDGPU_RESET_SRC_USERQ: + strscpy(buf, "user queue trigger", len); + break; default: strscpy(buf, "unknown", len); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4d9b9701139b..07b4d37f1db6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -43,6 +43,7 @@ enum AMDGPU_RESET_SRCS { AMDGPU_RESET_SRC_MES, AMDGPU_RESET_SRC_HWS, AMDGPU_RESET_SRC_USER, + AMDGPU_RESET_SRC_USERQ, }; struct amdgpu_reset_context { @@ -160,4 +161,16 @@ int amdgpu_reset_do_xgmi_reset_on_init( bool amdgpu_reset_in_recovery(struct amdgpu_device *adev); +static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev, + bool status) +{ + adev->pcie_reset_ctx.occurs_dpc = status; + adev->no_hw_access = status; +} + +static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev) +{ + return adev->pcie_reset_ctx.occurs_dpc; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6379bb25bf5c..486c3646710c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -421,8 +421,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - - ring->adev->rings[ring->idx] = NULL; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index d45ebfb642ca..a0b479d5fff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { - u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; + u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -94,9 +94,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; - va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); - r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - va_flags); + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, + AMDGPU_VA_RESERVED_SEQ64_SIZE, + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 27ab4e754b2a..428265046815 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -226,7 +226,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER); if (r) return r; @@ -406,7 +407,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, - false); + false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; } else if (wipe_fence) { @@ -1510,7 +1511,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, - &job); + &job, + AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA); if (r) goto out; @@ -2167,7 +2169,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job, - bool delayed) + bool delayed, u64 k_job_id) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : @@ -2177,7 +2179,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, &adev->mman.high_pr; r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, - num_dw * 4, pool, job); + num_dw * 4, pool, job, k_job_id); if (r) return r; @@ -2217,7 +2219,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job, false); + resv, vm_needs_flush, &job, false, + AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER); if (r) return r; @@ -2252,7 +2255,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush, bool delayed) + bool vm_needs_flush, bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2265,7 +2269,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job, delayed); + &job, delayed, k_job_id); if (r) return r; @@ -2335,7 +2339,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, goto err; r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, - &next, true, true); + &next, true, true, + AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) goto err; @@ -2354,7 +2359,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, struct dma_fence **f, - bool delayed) + bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2384,7 +2390,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true, delayed); + &next, true, delayed, k_job_id); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 2309df3f68a9..d82d107fdcc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -182,7 +182,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, struct dma_fence **fence, - bool delayed); + bool delayed, + u64 k_job_id); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index c92b8794aa73..2e039fb778ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -252,6 +252,7 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, block, pasid, pasid_fn, data, reset); if (!ret) { atomic_inc(&con->page_retirement_req_cnt); + atomic_inc(&con->poison_consumption_count); wake_up(&con->page_retirement_wq); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index c3ace8030530..467e8fa6cb8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -364,7 +364,7 @@ static int amdgpu_mqd_info_read(struct seq_file *m, void *unused) return -EINVAL; } - seq_printf(m, "queue_type %d\n", queue->queue_type); + seq_printf(m, "queue_type: %d\n", queue->queue_type); seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj)); amdgpu_bo_unreserve(bo); @@ -471,6 +471,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); kfree(queue); + r = -EINVAL; goto unlock; } @@ -567,7 +568,6 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, args->in.queue_size || args->in.rptr_va || args->in.wptr_va || - args->in.wptr_va || args->in.mqd || args->in.mqd_size) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index b1ca91b7cda4..1bd84f4cce78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -78,6 +78,12 @@ struct amdgpu_userq_funcs { struct amdgpu_usermode_queue *queue); int (*map)(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue); + int (*preempt)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*restore)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*detect_and_reset)(struct amdgpu_device *adev, + int queue_type); }; /* Usermode queues for gfx */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index c2a983ff23c9..95e91d1dc58a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -67,6 +67,14 @@ static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) return le64_to_cpu(*fence_drv->cpu_addr); } +static void +amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, + u64 seq) +{ + if (fence_drv->cpu_addr) + *fence_drv->cpu_addr = cpu_to_le64(seq); +} + int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq) { @@ -408,6 +416,40 @@ static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) dma_fence_put(fence); } +static void +amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, + int error) +{ + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + unsigned long flags; + struct dma_fence *f; + + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + + f = rcu_dereference_protected(&fence->base, + lockdep_is_held(&fence_drv->fence_list_lock)); + if (f && !dma_fence_is_signaled_locked(f)) + dma_fence_set_error(f, error); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); +} + +void +amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) +{ + struct dma_fence *f = userq->last_fence; + + if (f) { + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + u64 wptr = fence->base.seqno; + + amdgpu_userq_fence_driver_set_error(fence, -ECANCELED); + amdgpu_userq_fence_write(fence_drv, wptr); + amdgpu_userq_fence_driver_process(fence_drv); + + } +} + int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index 97a125ab8a78..d76add2afc77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -67,6 +67,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 74758b5ffc6c..5c38f0d30c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1136,7 +1136,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, AMDGPU_FENCE_OWNER_UNDEFINED, 64, direct ? AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b9060bcd4806..ce318f5de047 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -449,7 +449,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -540,7 +540,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_FENCE_OWNER_UNDEFINED, ib_size_dw * 4, direct ? AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f1f67521c29c..595f0df17bcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -92,6 +92,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); +static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev); int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) { @@ -184,16 +185,16 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; dev_info(adev->dev, - "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", - enc_major, enc_minor, dec_ver, vep, fw_rev); + "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", + i, enc_major, enc_minor, dec_ver, vep, fw_rev); } else { unsigned int version_major, version_minor, family_id; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n", - version_major, version_minor, family_id); + dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n", + i, version_major, version_minor, family_id); } bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; @@ -285,6 +286,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) amdgpu_ucode_release(&adev->vcn.inst[0].fw); adev->vcn.inst[i].fw = NULL; } + + if (adev->vcn.reg_list) + amdgpu_vcn_reg_dump_fini(adev); + mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); @@ -352,8 +357,6 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) if (adev->vcn.harvest_config & (1 << i)) return 0; - cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); - /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) @@ -405,6 +408,54 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) return 0; } +void amdgpu_vcn_get_profile(struct amdgpu_device *adev) +{ + int r; + + mutex_lock(&adev->vcn.workload_profile_mutex); + + if (adev->vcn.workload_profile_active) { + mutex_unlock(&adev->vcn.workload_profile_mutex); + return; + } + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + true); + if (r) + dev_warn(adev->dev, + "(%d) failed to enable video power profile mode\n", r); + else + adev->vcn.workload_profile_active = true; + mutex_unlock(&adev->vcn.workload_profile_mutex); +} + +void amdgpu_vcn_put_profile(struct amdgpu_device *adev) +{ + bool pg = true; + int r, i; + + mutex_lock(&adev->vcn.workload_profile_mutex); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) { + pg = false; + break; + } + } + + if (pg) { + r = amdgpu_dpm_switch_power_profile( + adev, PP_SMC_POWER_PROFILE_VIDEO, false); + if (r) + dev_warn( + adev->dev, + "(%d) failed to disable video power profile mode\n", + r); + else + adev->vcn.workload_profile_active = false; + } + + mutex_unlock(&adev->vcn.workload_profile_mutex); +} + static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_vcn_inst *vcn_inst = @@ -412,7 +463,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i = vcn_inst->inst, j; - int r = 0; if (adev->vcn.harvest_config & (1 << i)) return; @@ -438,16 +488,11 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[i]; if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { + mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); - mutex_lock(&adev->vcn.workload_profile_mutex); - if (adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); - adev->vcn.workload_profile_active = false; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); + mutex_unlock(&vcn_inst->vcn_pg_lock); + amdgpu_vcn_put_profile(adev); + } else { schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } @@ -457,30 +502,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; - int r = 0; atomic_inc(&vcn_inst->total_submission_cnt); cancel_delayed_work_sync(&vcn_inst->idle_work); - /* We can safely return early here because we've cancelled the - * the delayed work so there is no one else to set it to false - * and we don't care if someone else sets it to true. - */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - - mutex_lock(&adev->vcn.workload_profile_mutex); - if (!adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); - if (r) - dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); - adev->vcn.workload_profile_active = true; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); - -pg_lock: mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); @@ -508,6 +534,7 @@ pg_lock: vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } mutex_unlock(&vcn_inst->vcn_pg_lock); + amdgpu_vcn_get_profile(adev); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -601,7 +628,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 64, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) goto err; @@ -781,7 +808,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) goto err; @@ -911,7 +938,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -978,7 +1005,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -1527,3 +1554,86 @@ int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, return amdgpu_vcn_reset_engine(adev, ring->me); } + +int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count) +{ + adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count, + sizeof(uint32_t), GFP_KERNEL); + if (!adev->vcn.ip_dump) + return -ENOMEM; + adev->vcn.reg_list = reg; + adev->vcn.reg_count = count; + + return 0; +} + +static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev) +{ + kfree(adev->vcn.ip_dump); + adev->vcn.ip_dump = NULL; + adev->vcn.reg_list = NULL; + adev->vcn.reg_count = 0; +} + +void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + u32 inst_off; + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * adev->vcn.reg_count; + /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */ + adev->vcn.ip_dump[inst_off] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i)); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + + if (is_powered) + for (j = 1; j < adev->vcn.reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i)); + } +} + +void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + u32 inst_off; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * adev->vcn.reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < adev->vcn.reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0bc0a94d7cf0..6d9acd36041d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -237,6 +237,8 @@ #define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2 +struct amdgpu_hwip_reg_entry; + enum amdgpu_vcn_caps { AMDGPU_VCN_RRMT_ENABLED, }; @@ -362,6 +364,8 @@ struct amdgpu_vcn { bool workload_profile_active; struct mutex workload_profile_mutex; + u32 reg_count; + const struct amdgpu_hwip_reg_entry *reg_list; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -557,4 +561,11 @@ int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *guilty_fence); +int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count); +void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block); +void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p); +void amdgpu_vcn_get_profile(struct amdgpu_device *adev); +void amdgpu_vcn_put_profile(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 3da3ebb1d9a1..58accf2259b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -267,7 +267,8 @@ struct amdgpu_virt { struct amdgpu_irq_src rcv_irq; struct work_struct flr_work; - struct work_struct bad_pages_work; + struct work_struct req_bad_pages_work; + struct work_struct handle_bad_pages_work; struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0b87798daebd..dbda3a38a2b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -977,7 +977,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL); + r = vm->update_funcs->prepare(¶ms, NULL, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES); if (r) goto error; @@ -1146,7 +1147,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, dma_fence_put(tmp); } - r = vm->update_funcs->prepare(¶ms, sync); + r = vm->update_funcs->prepare(¶ms, sync, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE); if (r) goto error_free; @@ -1283,7 +1285,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *obj = &bo->tbo.base; if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -1339,13 +1341,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here * but in case of something, we filter the flags in first place */ - if (!(mapping->flags & AMDGPU_PTE_READABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE)) update_flags &= ~AMDGPU_PTE_READABLE; - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE)) update_flags &= ~AMDGPU_PTE_WRITEABLE; /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags, + &update_flags); trace_amdgpu_vm_bo_update(mapping); @@ -1486,7 +1489,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, struct dma_fence *fence) { - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_add_prt_cb(adev, fence); kfree(mapping); } @@ -1765,7 +1768,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_prt_get(adev); if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) @@ -1825,7 +1828,7 @@ static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo *bo = bo_va->base.bo; @@ -1884,7 +1887,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo = bo_va->base.bo; @@ -2741,7 +2744,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) { + if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); prt_fini_needed = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fd086efd8457..67eaf5402e7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -308,7 +308,7 @@ struct amdgpu_vm_update_params { struct amdgpu_vm_update_funcs { int (*map_table)(struct amdgpu_bo_vm *bo); int (*prepare)(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync); + struct amdgpu_sync *sync, u64 k_job_id); int (*update)(struct amdgpu_vm_update_params *p, struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); @@ -538,11 +538,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, - uint64_t size, uint64_t flags); + uint64_t size, uint32_t flags); int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, - uint64_t size, uint64_t flags); + uint64_t size, uint32_t flags); int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr); @@ -670,4 +670,9 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, void amdgpu_vm_print_task_info(struct amdgpu_device *adev, struct amdgpu_task_info *task_info); +#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \ + list_for_each_entry(mapping, &(bo_va)->valids, list) +#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \ + list_for_each_entry(mapping, &(bo_va)->invalids, list) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 0c1ef5850a5e..22e2e5b47341 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -40,12 +40,14 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) * * @p: see amdgpu_vm_update_params definition * @sync: sync obj with fences to wait on + * @k_job_id: the id for tracing/debug purposes * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, + u64 k_job_id) { if (!sync) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 30022123b0bf..f794fb1cc06e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_vm.h" +#include "amdgpu_job.h" /* * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt @@ -395,7 +396,8 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL); + r = vm->update_funcs->prepare(¶ms, NULL, + AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR); if (r) goto exit; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 46d9fb433ab2..36805dcfa159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -40,7 +40,7 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) /* Allocate a new job for @count PTE updates */ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, - unsigned int count) + unsigned int count, u64 k_job_id) { enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; @@ -56,7 +56,7 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, - ndw * 4, pool, &p->job); + ndw * 4, pool, &p->job, k_job_id); if (r) return r; @@ -69,16 +69,17 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, * * @p: see amdgpu_vm_update_params definition * @sync: amdgpu_sync object with fences to wait for + * @k_job_id: identifier of the job, for tracing purpose * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, u64 k_job_id) { int r; - r = amdgpu_vm_sdma_alloc_job(p, 0); + r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id); if (r) return r; @@ -249,7 +250,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (r) return r; - r = amdgpu_vm_sdma_alloc_job(p, count); + r = amdgpu_vm_sdma_alloc_job(p, count, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 121ee17b522b..474bfe36c0c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -379,9 +379,10 @@ static int vpe_sw_init(struct amdgpu_ip_block *ip_block) if (ret) goto out; - /* TODO: Add queue reset mask when FW fully supports it */ adev->vpe.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vpe.ring); + if (!amdgpu_sriov_vf(adev)) + adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; ret = amdgpu_vpe_sysfs_reset_mask_init(adev); if (ret) goto out; @@ -435,6 +436,8 @@ static int vpe_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; + cancel_delayed_work_sync(&adev->vpe.idle_work); + vpe_ring_stop(vpe); /* Power off VPE */ @@ -445,10 +448,6 @@ static int vpe_hw_fini(struct amdgpu_ip_block *ip_block) static int vpe_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = ip_block->adev; - - cancel_delayed_work_sync(&adev->vpe.idle_work); - return vpe_hw_fini(ip_block); } @@ -874,6 +873,27 @@ static void vpe_ring_end_use(struct amdgpu_ring *ring) schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } +static int vpe_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_GATE); + if (r) + return r; + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev, struct device_attribute *attr, char *buf) @@ -942,6 +962,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, + .reset = vpe_ring_reset, }; static void vpe_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 78f9e86ccc09..e69db0a93378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -396,6 +396,35 @@ out: return ret; } +int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, + uint64_t address, struct amdgpu_vram_block_info *info) +{ + struct amdgpu_vram_mgr_resource *vres; + struct drm_buddy_block *block; + u64 start, size; + int ret = -ENOENT; + + mutex_lock(&mgr->lock); + list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) { + list_for_each_entry(block, &vres->blocks, link) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + if ((start <= address) && (address < (start + size))) { + info->start = start; + info->size = size; + memcpy(&info->task, &vres->task, sizeof(vres->task)); + ret = 0; + goto out; + } + } + } + +out: + mutex_unlock(&mgr->lock); + + return ret; +} + static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { @@ -568,6 +597,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, remaining_size -= size; } + vres->task.pid = task_pid_nr(current); + get_task_comm(vres->task.comm, current); + list_add_tail(&vres->vres_node, &mgr->allocated_vres_list); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { struct drm_buddy_block *dcc_block; unsigned long dcc_start; @@ -645,6 +678,10 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, uint64_t vis_usage = 0; mutex_lock(&mgr->lock); + + list_del(&vres->vres_node); + memset(&vres->task, 0, sizeof(vres->task)); + list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); @@ -933,6 +970,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); + INIT_LIST_HEAD(&mgr->allocated_vres_list); mgr->default_page_size = PAGE_SIZE; if (!adev->gmc.is_app_apu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 2c88d5fd87da..5f5fd9a911c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -35,12 +35,26 @@ struct amdgpu_vram_mgr { struct list_head reserved_pages; atomic64_t vis_usage; u64 default_page_size; + struct list_head allocated_vres_list; +}; + +struct amdgpu_vres_task { + pid_t pid; + char comm[TASK_COMM_LEN]; +}; + +struct amdgpu_vram_block_info { + u64 start; + u64 size; + struct amdgpu_vres_task task; }; struct amdgpu_vram_mgr_resource { struct ttm_resource base; struct list_head blocks; unsigned long flags; + struct list_head vres_node; + struct amdgpu_vres_task task; }; static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) @@ -72,4 +86,7 @@ static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) ares->flags |= DRM_BUDDY_CLEARED; } +int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, + uint64_t address, struct amdgpu_vram_block_info *info); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index c417f8689220..1083db8cea2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -120,6 +120,25 @@ static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, xcp->valid = true; } +static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr, + int xcp_id) +{ + struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id]; + struct amdgpu_device *adev = xcp_mgr->adev; + uint32_t inst_mask; + uint64_t uid; + int i; + + if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) && + inst_mask) { + i = GET_INST(GC, (ffs(inst_mask) - 1)); + uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info, + AMDGPU_UID_TYPE_XCD, i); + if (uid) + xcp->unique_id = uid; + } +} + int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) { struct amdgpu_device *adev = xcp_mgr->adev; @@ -158,6 +177,7 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) else xcp_mgr->xcp[i].mem_id = mem_id; } + __amdgpu_xcp_set_unique_id(xcp_mgr, i); } xcp_mgr->num_xcps = num_xcps; @@ -406,6 +426,7 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev; p_ddev->driver = adev->xcp_mgr->xcp[i].driver; p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager; + amdgpu_xcp_drm_dev_free(p_ddev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 70a0f8400b57..1928d9e224fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -112,6 +112,7 @@ struct amdgpu_xcp { struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; struct amdgpu_xcp_mgr *xcp_mgr; struct kobject kobj; + uint64_t unique_id; }; struct amdgpu_xcp_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 427b073de2fc..1c994d0cc50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1494,6 +1494,27 @@ static void atom_get_vbios_version(struct atom_context *ctx) } } +static void atom_get_vbios_build(struct atom_context *ctx) +{ + unsigned char *atom_rom_hdr; + unsigned char *str; + uint16_t base; + + base = CU16(ATOM_ROM_TABLE_PTR); + atom_rom_hdr = CSTR(base); + + str = CSTR(CU16(base + ATOM_ROM_CFG_PTR)); + /* Skip config string */ + while (str < atom_rom_hdr && *str++) + ; + /* Skip change list string */ + while (str < atom_rom_hdr && *str++) + ; + + if ((str + STRLEN_NORMAL) < atom_rom_hdr) + strscpy(ctx->build_num, str, STRLEN_NORMAL); +} + struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) { int base; @@ -1554,6 +1575,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) atom_get_vbios_pn(ctx); atom_get_vbios_date(ctx); atom_get_vbios_version(ctx); + atom_get_vbios_build(ctx); return ctx; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index b807f6639a4c..825ff28731f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -37,6 +37,7 @@ struct drm_device; #define ATOM_ROM_MAGIC "ATOM" #define ATOM_ROM_MAGIC_PTR 4 +#define ATOM_ROM_CFG_PTR 0xC #define ATOM_ROM_MSG_PTR 0x10 #define ATOM_ROM_CMD_PTR 0x1E #define ATOM_ROM_DATA_PTR 0x20 @@ -151,6 +152,7 @@ struct atom_context { uint32_t version; uint8_t vbios_ver_str[STRLEN_NORMAL]; uint8_t date[STRLEN_NORMAL]; + uint8_t build_num[STRLEN_NORMAL]; }; extern int amdgpu_atom_debug; diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c new file mode 100644 index 000000000000..96616a865aac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "cyan_skillfish_ip_offset.h" + +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + + adev->gfx.xcc_mask = 1; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bf7c22f81cda..72ca6538b2e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1141,8 +1141,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } @@ -1462,17 +1461,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev) static void dce_v10_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 47e05783c4a0..e84608891300 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1173,8 +1173,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } @@ -1511,17 +1510,12 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) static void dce_v11_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 276c025c4c03..acc887a58518 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1034,7 +1034,6 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; @@ -1451,17 +1450,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev) static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index e62ccf9eb73d..2ccd6aad8dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1096,8 +1096,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } @@ -1443,17 +1442,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev) static void dce_v8_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7bd506f06eb1..264183ab24ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7668,19 +7668,17 @@ static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v10_0_cp_compute_enable(adev, false); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - } + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c01c241a1b06..3d9c045a8a64 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1612,9 +1612,9 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if (!adev->gfx.disable_uq && - adev->gfx.me_fw_version >= 2390 && - adev->gfx.pfp_fw_version >= 2530 && - adev->gfx.mec_fw_version >= 2600 && + adev->gfx.me_fw_version >= 2420 && + adev->gfx.pfp_fw_version >= 2580 && + adev->gfx.mec_fw_version >= 2650 && adev->mes.fw_version[0] >= 120) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; @@ -4129,6 +4129,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4281,8 +4283,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; @@ -4639,8 +4643,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->gfxhub.funcs->set_fault_enable_default(adev, value); /* TODO investigate why this and the hdp flush above is needed, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 3e138527d534..5dbc5dbc694a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3026,6 +3026,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -3175,8 +3177,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; @@ -3520,8 +3524,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->gfxhub.funcs->set_fault_enable_default(adev, value); /* TODO investigate why this and the hdp flush above is needed, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 20b30f4b3c7d..a6ff9a137a83 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2650,6 +2650,9 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) !READ_ONCE(adev->barrier_has_auto_waitcnt)); WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); break; + case IP_VERSION(9, 4, 2): + gfx_v9_4_2_init_sq(adev); + break; default: break; } @@ -4172,19 +4175,17 @@ static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - } + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index c48cd47b531f..8058ea91ecaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -748,6 +748,18 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, } } +void gfx_v9_4_2_init_sq(struct amdgpu_device *adev) +{ + uint32_t data; + + if (adev->gfx.mec_fw_version >= 98) { + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + data = RREG32_SOC15(GC, 0, regSQ_CONFIG1); + data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1); + WREG32_SOC15(GC, 0, regSQ_CONFIG1, data); + } +} + void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, uint32_t first_vmid, uint32_t last_vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 7584624b641c..a603724c1dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -28,6 +28,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, uint32_t first_vmid, uint32_t last_vmid); void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, uint32_t die_id); +void gfx_v9_4_2_init_sq(struct amdgpu_device *adev); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 51babf5c78c8..8ba66d4dfe86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2461,19 +2461,17 @@ static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - } + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 7923f491cf73..d7499be8c4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -466,24 +466,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int * 0 valid */ -static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - } -} - static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -508,21 +490,39 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_NOALLOC; - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_LOG; @@ -563,7 +563,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .map_mtype = gmc_v10_0_map_mtype, .get_vm_pde = gmc_v10_0_get_vm_pde, .get_vm_pte = gmc_v10_0_get_vm_pte, .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size, @@ -964,8 +963,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; if (!adev->in_s0ix) adev->gfxhub.funcs->set_fault_enable_default(adev, value); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index f15d691e9a20..7bc389d9f5c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -430,24 +430,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int * 0 valid */ -static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - } -} - static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -472,21 +454,39 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_NOALLOC; - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_LOG; @@ -527,7 +527,6 @@ static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping, - .map_mtype = gmc_v11_0_map_mtype, .get_vm_pde = gmc_v11_0_get_vm_pde, .get_vm_pte = gmc_v11_0_get_vm_pte, .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size, @@ -906,8 +905,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index de763105fdfd..76d3c40735b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -336,6 +336,22 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried; int vmid, i; + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready && + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) { + struct mes_inv_tlbs_pasid_input input = {0}; + input.pasid = pasid; + input.flush_type = flush_type; + input.hub_id = AMDGPU_GFXHUB(0); + /* MES will invalidate all gc_hub for the device from master */ + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + if (all_hub) { + /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */ + input.hub_id = AMDGPU_MMHUB0(0); + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + } + return; + } + for (vmid = 1; vmid < 16; vmid++) { bool valid; @@ -453,20 +469,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 0 valid */ -static uint64_t gmc_v12_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - } -} - static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -490,19 +492,35 @@ static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT_GFX12) { - *flags |= AMDGPU_PTE_PRT_GFX12; + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_SYSTEM; *flags |= AMDGPU_PTE_IS_PTE; @@ -543,7 +561,6 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping, - .map_mtype = gmc_v12_0_map_mtype, .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, @@ -876,8 +893,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 8030fcd64210..f6ad7911f1e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -382,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { *flags &= ~AMDGPU_PTE_EXECUTABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index a8d5795084fc..93d7ccb7d013 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -504,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { *flags &= ~AMDGPU_PTE_EXECUTABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index b45fa0cea9d2..c5e2a2c41e06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -716,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags &= ~AMDGPU_PTE_PRT; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c4d69cf4e06c..8404695eb13f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1073,27 +1073,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v * 0 valid */ -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) - -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - } -} - static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -1123,6 +1102,7 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -1236,25 +1216,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_RW: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); + break; + case AMDGPU_VM_MTYPE_CC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); + break; + } - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags &= ~AMDGPU_PTE_VALID; } if ((*flags & AMDGPU_PTE_VALID) && bo) - gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.vm, bo, - flags); + gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags); } static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, @@ -1391,7 +1389,6 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 5900b560b7de..333e9c30c091 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -587,8 +587,7 @@ static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 068ed849dbad..95b3f4e55ec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -562,8 +562,7 @@ static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 40a3530e0453..b32ea4129c61 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -552,8 +552,7 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index a887df520414..4258d3e0b706 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -29,6 +29,8 @@ #include "amdgpu.h" #include "isp_v4_1_1.h" +MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin"); + #define ISP_PERFORMANCE_STATE_LOW 0 #define ISP_PERFORMANCE_STATE_HIGH 1 diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index b86288a69e7b..a78144773fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -444,7 +444,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 481d1a2dbe5a..5d86e1d846eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -686,7 +686,7 @@ static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index e0a71909252b..34c70270ea1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -584,7 +584,7 @@ static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; if (enable) { if (!jpeg_v5_0_0_is_idle(ip_block)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 54523dc1f702..baf097d2e1ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -196,6 +196,14 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) } } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + r = amdgpu_jpeg_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); + return r; + } + } + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1)); if (r) return r; @@ -307,7 +315,7 @@ static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return ret; @@ -689,7 +697,7 @@ static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; @@ -1016,8 +1024,9 @@ static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_ban /* reference to smu driver if header file */ static int jpeg_v5_0_1_err_codes[] = { - 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ - 24, 25, 26, 27, 28, 29, 30, 31 + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31, + 48, 49, 50, 51, }; static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, @@ -1058,6 +1067,11 @@ static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_comm if (r) return r; + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v5_0_1_aca_info, NULL); + if (r) + goto late_fini; + if (amdgpu_ras_is_supported(adev, ras_block->block) && adev->jpeg.inst->ras_poison_irq.funcs) { r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0); @@ -1065,11 +1079,6 @@ static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_comm goto late_fini; } - r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, - &jpeg_v5_0_1_aca_info, NULL); - if (r) - goto late_fini; - return 0; late_fini: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index d6f50b13e2ba..aee26f80bd53 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -21,6 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "mes_userqueue.h" @@ -198,6 +199,53 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } +static int mes_userq_detect_and_reset(struct amdgpu_device *adev, + int queue_type) +{ + int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev); + struct mes_detect_and_reset_queue_input input; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + unsigned int hung_db_num = 0; + int queue_id, r, i; + u32 db_array[4]; + + if (db_array_size > 4) { + dev_err(adev->dev, "DB array size (%d vs 4) too small\n", + db_array_size); + return -EINVAL; + } + + memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input)); + + input.queue_type = queue_type; + + amdgpu_mes_lock(&adev->mes); + r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false, + &hung_db_num, db_array); + amdgpu_mes_unlock(&adev->mes); + if (r) { + dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r); + } else if (hung_db_num) { + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (queue->queue_type == queue_type) { + for (i = 0; i < hung_db_num; i++) { + if (queue->doorbell_index == db_array[i]) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + atomic_inc(&adev->gpu_reset_counter); + amdgpu_userq_fence_driver_force_completion(queue); + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); + } + } + } + } + } + } + + return r; +} + static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -352,4 +400,5 @@ const struct amdgpu_userq_funcs userq_mes_funcs = { .mqd_destroy = mes_userq_mqd_destroy, .unmap = mes_userq_unmap, .map = mes_userq_map, + .detect_and_reset = mes_userq_detect_and_reset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 28eb846280dd..3b91ea601add 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -66,6 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define GFX_MES_DRAM_SIZE 0x80000 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) +#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4 + static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -641,8 +643,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); - return -EINVAL; + dev_warn_once(mes->adev->dev, + "MES FW version must be larger than 0x63 to support limit single process feature.\n"); + return 0; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; misc_pkt.change_config.opcode = @@ -783,6 +786,32 @@ static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__RESET, api_status)); } +static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_reset_queue_pkt.doorbell_offset_addr = + mes->hung_queue_db_array_gpu_addr; + + if (input->detect_only) + mes_reset_queue_pkt.hang_detect_only = 1; + else + mes_reset_queue_pkt.hang_detect_then_reset = 1; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -792,6 +821,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, .reset_hw_queue = mes_v11_0_reset_hw_queue, + .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1684,6 +1714,8 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; + adev->mes.hung_queue_db_array_size = + MES11_HUNG_DB_OFFSET_ARRAY_SIZE; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 6b222630f3fa..998893dff08e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -47,6 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 +#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4 + static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -108,6 +110,7 @@ static const char *mes_v12_0_opcodes[] = { "SET_SE_MODE", "SET_GANG_SUBMIT", "SET_HW_RSRC_1", + "INVALIDATE_TLBS", }; static const char *mes_v12_0_misc_opcodes[] = { @@ -567,13 +570,41 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) @@ -879,6 +910,74 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__RESET, api_status)); } +static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_reset_queue_pkt.doorbell_offset_addr = + mes->hung_queue_db_array_gpu_addr; + + if (input->detect_only) + mes_reset_queue_pkt.hang_detect_only = 1; + else + mes_reset_queue_pkt.hang_detect_then_reset = 1; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + +static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) +{ + /* + * MES doesn't support invalidate gc_hub on slave xcc individually + * master xcc will invalidate all gc_hub for the partition + */ + if (AMDGPU_IS_GFXHUB(id)) + return 0; + else if (AMDGPU_IS_MMHUB0(id)) + return 1; + else + return -EINVAL; + +} + +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes, + struct mes_inv_tlbs_pasid_input *input) +{ + union MESAPI__INV_TLBS mes_inv_tlbs; + int ret; + + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); + + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; + + /*convert amdgpu_mes_hub_id to mes expected hub_id */ + ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); + if (ret < 0) + return -EINVAL; + mes_inv_tlbs.invalidate_tlbs.hub_id = ret; + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE, + &mes_inv_tlbs, sizeof(mes_inv_tlbs), + offsetof(union MESAPI__INV_TLBS, api_status)); + +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +987,8 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, .reset_hw_queue = mes_v12_0_reset_hw_queue, + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid, + .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1793,6 +1894,8 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; + adev->mes.hung_queue_db_array_size = + MES12_HUNG_DB_OFFSET_ARRAY_SIZE; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { r = amdgpu_mes_init_microcode(adev, pipe); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 48101a34e049..9a40107a0869 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -292,14 +292,32 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } } -static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work) +static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); if (down_read_trylock(&adev->reset_domain->sem)) { amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_request_bad_pages(adev); + up_read(&adev->reset_domain->sem); + } +} + +/** + * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information + * @work: pointer to the work_struct + * + * This work handler is triggered when bad pages are ready, and it reinitializes + * the data exchange region to retrieve updated bad page information from the host. + */ +static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_init_data_exchange(adev); up_read(&adev->reset_domain->sem); } @@ -327,10 +345,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_READY: + xgpu_ai_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.handle_bad_pages_work); + break; case IDH_RAS_BAD_PAGES_NOTIFICATION: xgpu_ai_mailbox_send_ack(adev); if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.bad_pages_work); + schedule_work(&adev->virt.req_bad_pages_work); break; case IDH_UNRECOV_ERR_NOTIFICATION: xgpu_ai_mailbox_send_ack(adev); @@ -415,7 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); - INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work); + INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work); + INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index f6d8597452ed..457972aa5632 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -202,9 +202,6 @@ send_request: case IDH_REQ_RAS_CPER_DUMP: event = IDH_RAS_CPER_DUMP_READY; break; - case IDH_REQ_RAS_BAD_PAGES: - event = IDH_RAS_BAD_PAGES_READY; - break; default: break; } @@ -359,14 +356,32 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } } -static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work) +static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); if (down_read_trylock(&adev->reset_domain->sem)) { amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_request_bad_pages(adev); + up_read(&adev->reset_domain->sem); + } +} + +/** + * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information + * @work: pointer to the work_struct + * + * This work handler is triggered when bad pages are ready, and it reinitializes + * the data exchange region to retrieve updated bad page information from the host. + */ +static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_init_data_exchange(adev); up_read(&adev->reset_domain->sem); } @@ -397,10 +412,15 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_READY: + xgpu_nv_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.handle_bad_pages_work); + break; case IDH_RAS_BAD_PAGES_NOTIFICATION: xgpu_nv_mailbox_send_ack(adev); if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.bad_pages_work); + schedule_work(&adev->virt.req_bad_pages_work); break; case IDH_UNRECOV_ERR_NOTIFICATION: xgpu_nv_mailbox_send_ack(adev); @@ -485,7 +505,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); - INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work); + INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work); + INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index d5002ff931d8..860bc5cb03c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -151,9 +151,9 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 -+ * BIF_SDMA4_DOORBELL_RANGE: -+ * ARCTURUS: 0x3be0 -+ * ALDEBARAN: 0x3be4 + * BIF_SDMA4_DOORBELL_RANGE: + * ARCTURUS: 0x3be0 + * ALDEBARAN: 0x3be4 */ if (adev->asic_type == CHIP_ALDEBARAN && instance == 4) reg = instance + 0x4 + 0x1 + diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 83e9782aef39..8f4817404f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -31,5 +31,6 @@ extern const struct amdgpu_ip_block_version nv_common_ip_block; void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); void nv_set_virt_ops(struct amdgpu_device *adev); +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 6cc05d36e359..64b240b51f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -149,12 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) int ret; int retry_loop; - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); + 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -397,18 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) msleep(500); - offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - - ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, - 0); - - if (ret) { - DRM_INFO("psp mode 1 reset failed!\n"); - return -EINVAL; - } - - DRM_INFO("psp mode1 reset succeed \n"); - return 0; } @@ -665,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_get_wptr = psp_v11_0_ring_get_wptr, .ring_set_wptr = psp_v11_0_ring_set_wptr, .load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw, - .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw + .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw, + .wait_for_bootloader = psp_v11_0_wait_for_bootloader }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index e6d8eddda2bf..db6e41967f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,7 +1377,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(6, 0, 0): - if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 1): @@ -1385,11 +1385,11 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 2): - if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 3): - if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 9e74c9822e62..9785fada4fa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -741,7 +741,6 @@ static void soc15_reg_base_init(struct amdgpu_device *adev) void soc15_set_virt_ops(struct amdgpu_device *adev) { adev->virt.ops = &xgpu_ai_virt_ops; - /* init soc15 reg base early enough so we can * request request full access for sriov before * set_ip_blocks. */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index e590cbdd8de9..8dc32787d625 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -536,8 +536,11 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); - if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0)) + /* The IP block decode of consumption is SMU */ + if (hwid != MCA_UMC_HWID_V12_0 || mcatype != MCA_UMC_MCATYPE_V12_0) { + con->umc_ecc_log.consumption_q_count++; return 0; + } if (!status) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 5dbaebb592b3..2e79a3afc774 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -623,7 +623,22 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, * * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. * - * Initialize the hardware, boot up the VCPU and do some testing + * Initialize the hardware, boot up the VCPU and do some testing. + * + * On SI, the UVD is meant to be used in a specific power state, + * or alternatively the driver can manually enable its clock. + * In amdgpu we use the dedicated UVD power state when DPM is enabled. + * Calling amdgpu_dpm_enable_uvd makes DPM select the UVD power state + * for the SMU and afterwards enables the UVD clock. + * This is automatically done by amdgpu_uvd_ring_begin_use when work + * is submitted to the UVD ring. Here, we have to call it manually + * in order to power up UVD before firmware validation. + * + * Note that we must not disable the UVD clock here, as that would + * cause the ring test to fail. However, UVD is powered off + * automatically after the ring test: amdgpu_uvd_ring_end_use calls + * the UVD idle work handler which will disable the UVD clock when + * all fences are signalled. */ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) { @@ -633,6 +648,15 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) int r; uvd_v3_1_mc_resume(adev); + uvd_v3_1_enable_mgcg(adev, true); + + /* Make sure UVD is powered during FW validation. + * It's going to be automatically powered off after the ring test. + */ + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); r = uvd_v3_1_fw_validate(adev); if (r) { @@ -640,9 +664,6 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) return r; } - uvd_v3_1_enable_mgcg(adev, true); - amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); - uvd_v3_1_start(adev); r = amdgpu_ring_test_helper(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 1c07b701d0e4..ceb94bbb03a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -217,7 +217,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -281,7 +282,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 9d237b5937fb..1f8866f3f63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -225,7 +225,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -288,7 +289,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index c74947705d77..1e89ba153d9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1338,7 +1338,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1399,7 +1398,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 68b4371df0f1..b115137ab2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -136,8 +136,6 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; volatile struct amdgpu_fw_shared *fw_shared; @@ -232,14 +230,9 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_0, ARRAY_SIZE(vcn_reg_list_2_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -276,8 +269,6 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) r = amdgpu_vcn_sw_fini(adev, 0); - kfree(adev->vcn.ip_dump); - return r; } @@ -865,6 +856,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; + int ret; vcn_v2_0_enable_static_power_gating(vinst); @@ -948,8 +940,13 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, 0, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, 0, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -2095,66 +2092,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } -static void vcn_v2_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v2_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2168,8 +2105,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .wait_for_idle = vcn_v2_0_wait_for_idle, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_0_dump_ip_state, - .print_ip_state = vcn_v2_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index bc30a5326866..904b94bc8693 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -116,7 +116,6 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i, j; - int r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn_inst *v = &adev->vcn.inst[i]; @@ -149,15 +148,7 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - mutex_lock(&adev->vcn.workload_profile_mutex); - if (adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); - adev->vcn.workload_profile_active = false; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); + amdgpu_vcn_put_profile(adev); } else { schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); } @@ -167,7 +158,6 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me]; - int r = 0; atomic_inc(&adev->vcn.inst[0].total_submission_cnt); @@ -177,20 +167,6 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) * the delayed work so there is no one else to set it to false * and we don't care if someone else sets it to true. */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - - mutex_lock(&adev->vcn.workload_profile_mutex); - if (!adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); - if (r) - dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); - adev->vcn.workload_profile_active = true; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); - -pg_lock: mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); @@ -218,6 +194,7 @@ pg_lock: v->pause_dpg_mode(v, &new_state); } mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); + amdgpu_vcn_get_profile(adev); } static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) @@ -297,8 +274,6 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, j, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { @@ -423,14 +398,9 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_5, ARRAY_SIZE(vcn_reg_list_2_5)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -477,8 +447,6 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); - return 0; } @@ -1035,6 +1003,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, @@ -1125,8 +1094,13 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) VCN, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -2127,66 +2101,6 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v2_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v2_5_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); - } -} - static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -2200,8 +2114,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = vcn_v2_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -2217,8 +2131,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = vcn_v2_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 4b8f4407047f..f3085137ba08 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -175,8 +175,6 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; /* @@ -304,14 +302,9 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_3_0, ARRAY_SIZE(vcn_reg_list_3_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -361,7 +354,6 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); return 0; } @@ -1042,6 +1034,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, @@ -1134,8 +1127,13 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -1888,15 +1886,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); @@ -2342,67 +2344,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off; - bool is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v3_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2416,8 +2357,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .wait_for_idle = vcn_v3_0_wait_for_idle, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = vcn_v3_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 1924e075b66f..bc9dfe5ffea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -183,8 +183,6 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - uint32_t *ptr; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -255,14 +253,9 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0, ARRAY_SIZE(vcn_reg_list_4_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -315,8 +308,6 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); - return 0; } @@ -1012,6 +1003,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -1094,8 +1086,13 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1624,7 +1621,6 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { vcn_v4_0_stop_dpg_mode(vinst); - r = 0; goto done; } @@ -1808,15 +1804,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); @@ -1907,22 +1907,16 @@ out: #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - #define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - #define RENCODE_ENCODE_STANDARD_AV1 2 #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +/* return the offset in ib if id is found, -1 otherwise */ +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start) { int i; - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { + for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) { if (ib->ptr[i + 1] == id) return i; } @@ -1937,33 +1931,29 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_vcn_decode_buffer *decode_buffer; uint64_t addr; uint32_t val; - int idx; + int idx = 0, sidx; /* The first instance can decode anything */ if (!ring->me) return 0; - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; - - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_limit_sched(p, job); + while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { + val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { + sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); + if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1) + return vcn_v4_0_limit_sched(p, job); + } + idx += ib->ptr[idx] / 4; } return 0; } @@ -2246,67 +2236,6 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], - i)); - } -} - static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2320,8 +2249,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .wait_for_idle = vcn_v4_0_wait_for_idle, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_dump_ip_state, - .print_ip_state = vcn_v4_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 2a3663b551af..7b93a275ec4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -134,6 +134,19 @@ static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + if (amdgpu_dpm_reset_vcn_is_supported(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + return 0; +} + static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { struct amdgpu_vcn4_fw_shared *fw_shared; @@ -160,8 +173,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r, vcn_inst; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - uint32_t *ptr; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -213,10 +224,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->vcn.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -231,20 +238,11 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) } } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - - r = amdgpu_vcn_sysfs_reset_mask_init(adev); + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_3, ARRAY_SIZE(vcn_reg_list_4_0_3)); if (r) return r; - return 0; + return amdgpu_vcn_sysfs_reset_mask_init(adev); } /** @@ -287,8 +285,6 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); - return 0; } @@ -391,7 +387,7 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); return 0; @@ -851,7 +847,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; - int vcn_inst; + int vcn_inst, ret; uint32_t tmp; vcn_inst = GET_INST(VCN, inst_idx); @@ -944,8 +940,13 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, 0, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1872,71 +1873,10 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs; } -static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_3_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off, inst_id; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_id = GET_INST(VCN, i); - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], - inst_id)); - } -} - static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, + .late_init = vcn_v4_0_3_late_init, .sw_init = vcn_v4_0_3_sw_init, .sw_fini = vcn_v4_0_3_sw_fini, .hw_init = vcn_v4_0_3_hw_init, @@ -1947,8 +1887,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .wait_for_idle = vcn_v4_0_3_wait_for_idle, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_3_dump_ip_state, - .print_ip_state = vcn_v4_0_3_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index caf2d95a85d4..6dbf33b26ee2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -147,9 +147,6 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - uint32_t *ptr; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -233,15 +230,9 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - return 0; + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_5, ARRAY_SIZE(vcn_reg_list_4_0_5)); + + return r; } /** @@ -284,8 +275,6 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); - return 0; } @@ -926,6 +915,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -1006,8 +996,13 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1596,7 +1591,7 @@ static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1704,67 +1699,6 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v4_0_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_5_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], - i)); - } -} - static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1778,8 +1712,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .wait_for_idle = vcn_v4_0_5_wait_for_idle, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_5_dump_ip_state, - .print_ip_state = vcn_v4_0_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 07a6e9582880..536f06b81706 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -115,21 +115,6 @@ static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev) -{ - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; - - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } -} - /** * vcn_v5_0_0_sw_init - sw init for VCN block * @@ -201,7 +186,9 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - vcn_v5_0_0_alloc_ip_dump(adev); + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0, ARRAY_SIZE(vcn_reg_list_5_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -251,8 +238,6 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - kfree(adev->vcn.ip_dump); - return 0; } @@ -713,6 +698,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -766,8 +752,13 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "%s: vcn sram load failed %d\n", __func__, ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1320,7 +1311,7 @@ static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1428,67 +1419,6 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, - struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1502,8 +1432,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .wait_for_idle = vcn_v5_0_0_wait_for_idle, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v5_0_0_dump_ip_state, - .print_ip_state = vcn_v5_0_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h index b8927652bc50..51bbccd4360f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -32,11 +32,6 @@ #define VCN_VID_IP_ADDRESS 0x0 #define VCN_AON_IP_ADDRESS 0x30000 -void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev); -void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, - struct drm_printer *p); -void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block); - extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; #endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index cdefd7fcb0da..4b01e35ad7ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -40,6 +40,40 @@ #include <drm/drm_drv.h> +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0_1[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev); static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); @@ -163,7 +197,17 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) return r; } - vcn_v5_0_0_alloc_ip_dump(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { + r = amdgpu_vcn_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize vcn ras block!\n"); + return r; + } + } + + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0_1, ARRAY_SIZE(vcn_reg_list_5_0_1)); + if (r) + return r; return amdgpu_vcn_sysfs_reset_mask_init(adev); } @@ -209,8 +253,6 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - kfree(adev->vcn.ip_dump); - return 0; } @@ -284,7 +326,7 @@ static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); return 0; @@ -605,7 +647,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; - int vcn_inst; + int vcn_inst, ret; uint32_t tmp; vcn_inst = GET_INST(VCN, inst_idx); @@ -666,8 +708,13 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, 0, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } /* resetting ring, fw should not check RB ring */ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; @@ -1475,8 +1522,8 @@ static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v5_0_0_dump_ip_state, - .print_ip_state = vcn_v5_0_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { @@ -1557,7 +1604,7 @@ static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank /* reference to smu driver if header file */ static int vcn_v5_0_1_err_codes[] = { - 14, 15, /* VCN */ + 14, 15, 47, /* VCN [D|V|S] */ }; static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, @@ -1603,6 +1650,13 @@ static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_commo if (r) goto late_fini; + if (amdgpu_ras_is_supported(adev, ras_block->block) && + adev->vcn.inst->ras_poison_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0); + if (r) + goto late_fini; + } + return 0; late_fini: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 828a9ceef1e7..8535a52a62ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1070,7 +1070,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, svm_range_list_lock_and_flush_work(&p->svms, current->mm); mutex_lock(&p->svms.lock); mmap_write_unlock(current->mm); - if (interval_tree_iter_first(&p->svms.objects, + + /* Skip a special case that allocates VRAM without VA, + * VA will be invalid of 0. + */ + if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) && + interval_tree_iter_first(&p->svms.objects, args->va_addr >> PAGE_SHIFT, (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { pr_err("Address: 0x%llx already allocated by SVM\n", @@ -2566,8 +2571,8 @@ static int criu_restore(struct file *filep, pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n", args->num_devices, args->num_bos, args->num_objects, args->priv_data_size); - if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size || - !args->num_devices || !args->num_bos) + if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data || + !args->priv_data_size || !args->num_devices) return -EINVAL; mutex_lock(&p->mutex); @@ -3252,8 +3257,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) int retcode = -EINVAL; bool ptrace_attached = false; - if (nr >= AMDKFD_CORE_IOCTL_COUNT) + if (nr >= AMDKFD_CORE_IOCTL_COUNT) { + retcode = -ENOTTY; goto err_i1; + } if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { u32 amdkfd_size; @@ -3266,8 +3273,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) asize = amdkfd_size; cmd = ioctl->cmd; - } else + } else { + retcode = -ENOTTY; goto err_i1; + } dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 2b0a830f5b29..fb3129883a4c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -46,11 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, int retval; union PM4_MES_TYPE_3_HEADER nop; - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) - return false; - - pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, - queue_size); + pr_debug("Initializing queue type %d size %d\n", type, queue_size); memset(&prop, 0, sizeof(prop)); memset(&nop, 0, sizeof(nop)); @@ -69,6 +65,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; break; default: + WARN(1, "Invalid queue type %d\n", type); dev_err(dev->adev->dev, "Invalid queue type %d\n", type); return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 79251f22b702..86315ecb6f1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -68,7 +68,8 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, - &job); + &job, + AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP); if (r) return r; @@ -260,20 +261,7 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) -{ - unsigned long upages = 0; - unsigned long i; - - for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - !(migrate->src[i] & MIGRATE_PFN_MIGRATE)) - upages++; - } - return upages; -} - -static int +static long svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t ttm_res_offset) @@ -282,7 +270,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; - uint64_t mpages = 0; + long mpages; dma_addr_t *src; uint64_t *dst; uint64_t i, j; @@ -296,6 +284,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); + mpages = 0; for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) { struct page *spage; @@ -356,13 +345,14 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, out_free_vram_pages: if (r) { pr_debug("failed %d to copy memory to vram\n", r); - for (i = 0; i < npages && mpages; i++) { + while (i-- && mpages) { if (!dst[i]) continue; svm_migrate_put_vram_page(adev, dst[i]); migrate->dst[i] = 0; mpages--; } + mpages = r; } #ifdef DEBUG_FORCE_MIXED_DOMAINS @@ -380,7 +370,7 @@ out_free_vram_pages: } #endif - return r; + return mpages; } static long @@ -395,7 +385,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; - unsigned long mpages = 0; + long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -441,15 +431,17 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, else pr_debug("0x%lx pages collected\n", cpages); - r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); + mpages = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); - mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + if (mpages >= 0) + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", mpages, cpages, migrate.npages); + else + r = mpages; svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages); @@ -459,14 +451,13 @@ out_free: start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger, r); out: - if (!r && mpages) { + if (!r && mpages > 0) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - - return mpages; } - return r; + + return r ? r : mpages; } /** @@ -577,7 +568,7 @@ static void svm_migrate_page_free(struct page *page) } } -static int +static long svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t npages) @@ -586,6 +577,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t *src; dma_addr_t *dst; struct page *dpage; + long mpages; uint64_t i = 0, j; uint64_t addr; int r = 0; @@ -598,6 +590,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, src = (uint64_t *)(scratch + npages); dst = scratch; + mpages = 0; for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) { struct page *spage; @@ -646,6 +639,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); + mpages++; j++; } @@ -655,13 +649,17 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, out_oom: if (r) { pr_debug("failed %d copy to ram\n", r); - while (i--) { + while (i-- && mpages) { + if (!migrate->dst[i]) + continue; svm_migrate_put_sys_page(dst[i]); migrate->dst[i] = 0; + mpages--; } + mpages = r; } - return r; + return mpages; } /** @@ -688,9 +686,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; - unsigned long upages = npages; unsigned long cpages = 0; - unsigned long mpages = 0; + long mpages = 0; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -736,7 +733,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, if (!cpages) { pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", prange->start, prange->last); - upages = svm_migrate_unsuccessful_pages(&migrate); goto out_free; } if (cpages != npages) @@ -745,13 +741,15 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, else pr_debug("0x%lx pages collected\n", cpages); - r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + mpages = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, scratch, npages); migrate_vma_pages(&migrate); - upages = svm_migrate_unsuccessful_pages(&migrate); - pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - upages, cpages, migrate.npages); + if (mpages >= 0) + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); + else + r = mpages; svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); @@ -764,8 +762,7 @@ out_free: start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, trigger, r); out: - if (!r && cpages) { - mpages = cpages - upages; + if (!r && mpages > 0) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_out, pdd->page_out + mpages); @@ -848,6 +845,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, } if (r >= 0) { + WARN_ONCE(prange->vram_pages < mpages, + "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).", + prange->vram_pages, mpages); prange->vram_pages -= mpages; /* prange does not have vram page set its actual_loc to system diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 67694bcd9464..d01ef5ac0766 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -111,7 +111,14 @@ #define KFD_KERNEL_QUEUE_SIZE 2048 -#define KFD_UNMAP_LATENCY_MS (4000) +/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC + * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time + * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is + * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload + * The format here makes CP workload 10% of total timeout + */ +#define KFD_UNMAP_LATENCY_MS \ + ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1) #define KFD_MAX_SDMA_QUEUES 128 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a0f22ea6d15a..68ba239b2e5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1189,7 +1189,7 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) } static uint64_t -svm_range_get_pte_flags(struct kfd_node *node, +svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm, struct svm_range *prange, int domain) { struct kfd_node *bo_node; @@ -1292,10 +1292,6 @@ svm_range_get_pte_flags(struct kfd_node *node, AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } - mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; - - if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) - mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; @@ -1305,7 +1301,10 @@ svm_range_get_pte_flags(struct kfd_node *node, if (gc_ip_version >= IP_VERSION(12, 0, 0)) pte_flags |= AMDGPU_PTE_IS_PTE; - pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); + amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags); + pte_flags |= AMDGPU_PTE_READABLE; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; return pte_flags; } @@ -1412,7 +1411,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", last_start, prange->start + i, last_domain ? "GPU" : "CPU"); - pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain); + pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain); if (readonly) pte_flags &= ~AMDGPU_PTE_WRITEABLE; @@ -1714,6 +1713,29 @@ static int svm_range_validate_and_map(struct mm_struct *mm, next = min(vma->vm_end, end); npages = (next - addr) >> PAGE_SHIFT; + /* HMM requires at least READ permissions. If provided with PROT_NONE, + * unmap the memory. If it's not already mapped, this is a no-op + * If PROT_WRITE is provided without READ, warn first then unmap + */ + if (!(vma->vm_flags & VM_READ)) { + unsigned long e, s; + + svm_range_lock(prange); + if (vma->vm_flags & VM_WRITE) + pr_debug("VM_WRITE without VM_READ is not supported"); + s = max(start, prange->start); + e = min(end, prange->last); + if (e >= s) + r = svm_range_unmap_from_gpus(prange, s, e, + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU); + svm_range_unlock(prange); + /* If unmap returns non-zero, we'll bail on the next for loop + * iteration, so just leave r and continue + */ + addr = next; + continue; + } + WRITE_ONCE(p->svms.faulting_task, current); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, readonly, owner, NULL, @@ -4239,7 +4261,7 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, r = svm_range_get_attr(p, mm, start, size, nattrs, attrs); break; default: - r = EINVAL; + r = -EINVAL; break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4ec73f33535e..5c98746eb72d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -530,6 +530,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", + dev->gpu->xcp ? + dev->gpu->xcp->unique_id : dev->gpu->adev->unique_id); sysfs_show_32bit_prop(buffer, offs, "num_xcc", NUM_XCC(dev->gpu->xcc_mask)); @@ -1587,7 +1589,8 @@ static int kfd_dev_create_p2p_links(void) break; if (!dev->gpu || !dev->gpu->adev || (dev->gpu->kfd->hive_id && - dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id && + amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev))) goto next; /* check if node(s) is/are peer accessible in one direction or bi-direction */ diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c index 8bc36f04b1b7..44009aa8216e 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c @@ -46,18 +46,29 @@ static const struct drm_driver amdgpu_xcp_driver = { static int8_t pdev_num; static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE]; +static DEFINE_MUTEX(xcp_mutex); int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) { struct platform_device *pdev; struct xcp_device *pxcp_dev; char dev_name[20]; - int ret; + int ret, i; + + guard(mutex)(&xcp_mutex); if (pdev_num >= MAX_XCP_PLATFORM_DEVICE) return -ENODEV; - snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num); + for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) { + if (!xcp_dev[i]) + break; + } + + if (i >= MAX_XCP_PLATFORM_DEVICE) + return -ENODEV; + + snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", i); pdev = platform_device_register_simple(dev_name, -1, NULL, 0); if (IS_ERR(pdev)) return PTR_ERR(pdev); @@ -73,8 +84,8 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) goto out_devres; } - xcp_dev[pdev_num] = pxcp_dev; - xcp_dev[pdev_num]->pdev = pdev; + xcp_dev[i] = pxcp_dev; + xcp_dev[i]->pdev = pdev; *ddev = &pxcp_dev->drm; pdev_num++; @@ -89,16 +100,43 @@ out_unregister: } EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc); -void amdgpu_xcp_drv_release(void) +static void free_xcp_dev(int8_t index) { - for (--pdev_num; pdev_num >= 0; --pdev_num) { - struct platform_device *pdev = xcp_dev[pdev_num]->pdev; + if ((index < MAX_XCP_PLATFORM_DEVICE) && (xcp_dev[index])) { + struct platform_device *pdev = xcp_dev[index]->pdev; devres_release_group(&pdev->dev, NULL); platform_device_unregister(pdev); - xcp_dev[pdev_num] = NULL; + + xcp_dev[index] = NULL; + pdev_num--; + } +} + +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev) +{ + int8_t i; + + guard(mutex)(&xcp_mutex); + + for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) { + if ((xcp_dev[i]) && (&xcp_dev[i]->drm == ddev)) { + free_xcp_dev(i); + break; + } + } +} +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free); + +void amdgpu_xcp_drv_release(void) +{ + int8_t i; + + guard(mutex)(&xcp_mutex); + + for (i = 0; pdev_num && i < MAX_XCP_PLATFORM_DEVICE; i++) { + free_xcp_dev(i); } - pdev_num = 0; } EXPORT_SYMBOL(amdgpu_xcp_drv_release); diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h index c1c4b679bf95..580a1602c8e3 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h @@ -25,5 +25,6 @@ #define _AMDGPU_XCP_DRV_H_ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev); +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev); void amdgpu_xcp_drv_release(void); #endif /* _AMDGPU_XCP_DRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 89d605de0595..0084a8d55254 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -44,6 +44,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mmhubbub subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mpc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/opp subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/pg +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/soc_and_ip_translator subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cd0e2976e268..62defeccbb5c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * @@ -39,13 +40,11 @@ #include "dc/dc_stat.h" #include "dc/dc_state.h" #include "amdgpu_dm_trace.h" -#include "dpcd_defs.h" #include "link/protocols/link_dpcd.h" #include "link_service_types.h" #include "link/protocols/link_dp_capability.h" #include "link/protocols/link_ddc.h" -#include "vid.h" #include "amdgpu.h" #include "amdgpu_display.h" #include "amdgpu_ucode.h" @@ -56,7 +55,6 @@ #include "amdgpu_dm_hdcp.h" #include <drm/display/drm_hdcp_helper.h> #include "amdgpu_dm_wb.h" -#include "amdgpu_pm.h" #include "amdgpu_atombios.h" #include "amd_shared.h" @@ -82,6 +80,7 @@ #include <linux/component.h> #include <linux/sort.h> +#include <drm/drm_privacy_screen_consumer.h> #include <drm/display/drm_dp_mst_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/drm_atomic.h> @@ -102,15 +101,6 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -#include "dcn/dcn_1_0_offset.h" -#include "dcn/dcn_1_0_sh_mask.h" -#include "soc15_hw_ip.h" -#include "soc15_common.h" -#include "vega10_ip_offset.h" - -#include "gc/gc_11_0_0_offset.h" -#include "gc/gc_11_0_0_sh_mask.h" - #include "modules/inc/mod_freesync.h" #include "modules/power/power_helpers.h" @@ -541,6 +531,50 @@ static void dm_pflip_high_irq(void *interrupt_params) amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e); } +static void dm_handle_vmin_vmax_update(struct work_struct *offload_work) +{ + struct vupdate_offload_work *work = container_of(offload_work, struct vupdate_offload_work, work); + struct amdgpu_device *adev = work->adev; + struct dc_stream_state *stream = work->stream; + struct dc_crtc_timing_adjust *adjust = work->adjust; + + mutex_lock(&adev->dm.dc_lock); + dc_stream_adjust_vmin_vmax(adev->dm.dc, stream, adjust); + mutex_unlock(&adev->dm.dc_lock); + + dc_stream_release(stream); + kfree(work->adjust); + kfree(work); +} + +static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, + struct dc_stream_state *stream, + struct dc_crtc_timing_adjust *adjust) +{ + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); + if (!offload_work) { + drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); + return; + } + + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL); + if (!adjust_copy) { + drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); + kfree(offload_work); + return; + } + + dc_stream_retain(stream); + memcpy(adjust_copy, adjust, sizeof(*adjust_copy)); + + INIT_WORK(&offload_work->work, dm_handle_vmin_vmax_update); + offload_work->adev = adev; + offload_work->stream = stream; + offload_work->adjust = adjust_copy; + + queue_work(system_wq, &offload_work->work); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -578,22 +612,27 @@ static void dm_vupdate_high_irq(void *interrupt_params) * page-flip completion events that have been queued to us * if a pageflip happened inside front-porch. */ - if (vrr_active) { + if (vrr_active && acrtc->dm_irq_params.stream) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state + == VRR_STATE_ACTIVE_VARIABLE; + amdgpu_dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ - if (acrtc->dm_irq_params.stream && - adev->family < AMDGPU_FAMILY_AI) { + if (adev->family < AMDGPU_FAMILY_AI) { spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); mod_freesync_handle_v_update( adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + schedule_dc_vmin_vmax(adev, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); } } @@ -676,15 +715,20 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + schedule_dc_vmin_vmax(adev, acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* @@ -2142,7 +2186,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { drm_err(adev_to_drm(adev), - "failed to initialize sw for display support.\n"); + "failed to initialize vblank for display support.\n"); goto error; } @@ -2913,6 +2957,17 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) return 0; } +static void dm_oem_i2c_hw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_display_manager *dm = &adev->dm; + + if (dm->oem_i2c) { + i2c_del_adapter(&dm->oem_i2c->base); + kfree(dm->oem_i2c); + dm->oem_i2c = NULL; + } +} + /** * dm_hw_init() - Initialize DC device * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. @@ -2963,7 +3018,7 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - kfree(adev->dm.oem_i2c); + dm_oem_i2c_hw_fini(adev); amdgpu_dm_hpd_fini(adev); @@ -3127,25 +3182,6 @@ static void dm_destroy_cached_state(struct amdgpu_device *adev) dm->cached_state = NULL; } -static void dm_complete(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - dm_destroy_cached_state(adev); -} - -static int dm_prepare_suspend(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - if (amdgpu_in_reset(adev)) - return 0; - - WARN_ON(adev->dm.cached_state); - - return dm_cache_state(adev); -} - static int dm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3571,10 +3607,8 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .early_fini = amdgpu_dm_early_fini, .hw_init = dm_hw_init, .hw_fini = dm_hw_fini, - .prepare_suspend = dm_prepare_suspend, .suspend = dm_suspend, .resume = dm_resume, - .complete = dm_complete, .is_idle = dm_is_idle, .wait_for_idle = dm_wait_for_idle, .check_soft_reset = dm_check_soft_reset, @@ -3612,18 +3646,20 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { + const struct drm_panel_backlight_quirk *panel_backlight_quirk; struct amdgpu_dm_backlight_caps *caps; struct drm_connector *conn_base; struct amdgpu_device *adev; struct drm_luminance_range_info *luminance_range; - int min_input_signal_override; + struct drm_device *drm; if (aconnector->bl_idx == -1 || aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP) return; conn_base = &aconnector->base; - adev = drm_to_adev(conn_base->dev); + drm = conn_base->dev; + adev = drm_to_adev(drm); caps = &adev->dm.backlight_caps[aconnector->bl_idx]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; @@ -3656,9 +3692,24 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) else caps->aux_min_input_signal = 1; - min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); - if (min_input_signal_override >= 0) - caps->min_input_signal = min_input_signal_override; + panel_backlight_quirk = + drm_get_panel_backlight_quirk(aconnector->drm_edid); + if (!IS_ERR_OR_NULL(panel_backlight_quirk)) { + if (panel_backlight_quirk->min_brightness) { + caps->min_input_signal = + panel_backlight_quirk->min_brightness - 1; + drm_info(drm, + "Applying panel backlight quirk, min_brightness: %d\n", + caps->min_input_signal); + } + if (panel_backlight_quirk->brightness_mask) { + drm_info(drm, + "Applying panel backlight quirk, brightness_mask: 0x%X\n", + panel_backlight_quirk->brightness_mask); + caps->brightness_mask = + panel_backlight_quirk->brightness_mask; + } + } } DEFINE_FREE(sink_release, struct dc_sink *, if (_T) dc_sink_release(_T)) @@ -4773,8 +4824,8 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap uint32_t *user_brightness) { u32 brightness = scale_input_to_fw(min, max, *user_brightness); - u8 prev_signal = 0, prev_lum = 0; - int i = 0; + u8 lower_signal, upper_signal, upper_lum, lower_lum, lum; + int left, right; if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) return; @@ -4782,32 +4833,44 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap if (!caps->data_points) return; - /* choose start to run less interpolation steps */ - if (caps->luminance_data[caps->data_points/2].input_signal > brightness) - i = caps->data_points/2; - do { - u8 signal = caps->luminance_data[i].input_signal; - u8 lum = caps->luminance_data[i].luminance; + left = 0; + right = caps->data_points - 1; + while (left <= right) { + int mid = left + (right - left) / 2; + u8 signal = caps->luminance_data[mid].input_signal; - /* - * brightness == signal: luminance is percent numerator - * brightness < signal: interpolate between previous and current luminance numerator - * brightness > signal: find next data point - */ - if (brightness > signal) { - prev_signal = signal; - prev_lum = lum; - i++; - continue; + /* Exact match found */ + if (signal == brightness) { + lum = caps->luminance_data[mid].luminance; + goto scale; } - if (brightness < signal) - lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (brightness - prev_signal), - signal - prev_signal); - *user_brightness = scale_fw_to_input(min, max, - DIV_ROUND_CLOSEST(lum * brightness, 101)); - return; - } while (i < caps->data_points); + + if (signal < brightness) + left = mid + 1; + else + right = mid - 1; + } + + /* verify bound */ + if (left >= caps->data_points) + left = caps->data_points - 1; + + /* At this point, left > right */ + lower_signal = caps->luminance_data[right].input_signal; + upper_signal = caps->luminance_data[left].input_signal; + lower_lum = caps->luminance_data[right].luminance; + upper_lum = caps->luminance_data[left].luminance; + + /* interpolate */ + if (right == left || !lower_lum) + lum = upper_lum; + else + lum = lower_lum + DIV_ROUND_CLOSEST((upper_lum - lower_lum) * + (brightness - lower_signal), + upper_signal - lower_signal); +scale: + *user_brightness = scale_fw_to_input(min, max, + DIV_ROUND_CLOSEST(lum * brightness, 101)); } static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4858,6 +4921,10 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, brightness = convert_brightness_from_user(caps, dm->brightness[bl_idx]); link = (struct dc_link *)dm->backlight_link[bl_idx]; + /* Apply brightness quirk */ + if (caps->brightness_mask) + brightness |= caps->brightness_mask; + /* Change brightness based on AUX property */ mutex_lock(&dm->dc_lock); if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) { @@ -4926,10 +4993,8 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, if (caps.aux_support) { u32 avg, peak; - bool rc; - rc = dc_link_get_backlight_level_nits(link, &avg, &peak); - if (!rc) + if (!dc_link_get_backlight_level_nits(link, &avg, &peak)) return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, avg); } @@ -4995,8 +5060,11 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) } else props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL; - if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) + if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) { drm_info(drm, "Using custom brightness curve\n"); + props.scale = BACKLIGHT_SCALE_NON_LINEAR; + } else + props.scale = BACKLIGHT_SCALE_LINEAR; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", @@ -6390,13 +6458,15 @@ static void fill_stream_properties_from_drm_display_mode( (struct drm_connector *)connector, mode_in); if (err < 0) - drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd \n", connector->name, err); + drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd\n", + connector->name, err); timing_out->vic = avi_frame.video_code; err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); if (err < 0) - drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd \n", connector->name, err); + drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd\n", + connector->name, err); timing_out->hdmi_vic = hv_frame.vic; } @@ -7792,6 +7862,9 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn); int ret; + if (WARN_ON(unlikely(!old_con_state || !new_con_state))) + return -EINVAL; + trace_amdgpu_dm_connector_atomic_check(new_con_state); if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { @@ -7803,6 +7876,14 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, if (!crtc) return 0; + if (new_con_state->privacy_screen_sw_state != old_con_state->privacy_screen_sw_state) { + new_crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(new_crtc_state)) + return PTR_ERR(new_crtc_state); + + new_crtc_state->mode_changed = true; + } + if (new_con_state->colorspace != old_con_state->colorspace) { new_crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(new_crtc_state)) @@ -7934,7 +8015,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); + mst_state->pbn_div.full = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; @@ -8378,8 +8459,7 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, drm_edid); - if (encoder && (connector->connector_type != DRM_MODE_CONNECTOR_eDP) && - (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) + if (encoder) amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, drm_edid); } @@ -8498,6 +8578,18 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, if (adev->dm.hdcp_workqueue) drm_connector_attach_content_protection_property(&aconnector->base, true); } + + if (connector_type == DRM_MODE_CONNECTOR_eDP) { + struct drm_privacy_screen *privacy_screen; + + privacy_screen = drm_privacy_screen_get(adev_to_drm(adev)->dev, NULL); + if (!IS_ERR(privacy_screen)) { + drm_connector_attach_privacy_screen_provider(&aconnector->base, + privacy_screen); + } else if (PTR_ERR(privacy_screen) != -ENODEV) { + drm_warn(adev_to_drm(adev), "Error getting privacy-screen\n"); + } + } } static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, @@ -10086,69 +10178,40 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, drm_writeback_queue_job(wb_conn, new_con_state); } -/** - * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. - * @state: The atomic state to commit - * - * This will tell DC to commit the constructed DC state from atomic_check, - * programming the hardware. Any failures here implies a hardware failure, since - * atomic check should have filtered anything non-kosher. - */ -static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state) { + struct drm_connector_state *old_con_state, *new_con_state; struct drm_device *dev = state->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_display_manager *dm = &adev->dm; - struct dm_atomic_state *dm_state; - struct dc_state *dc_state = NULL; - u32 i, j; - struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - unsigned long flags; - bool wait_for_vblank = true; struct drm_connector *connector; - struct drm_connector_state *old_con_state, *new_con_state; - struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; - int crtc_disable_count = 0; - - trace_amdgpu_dm_atomic_commit_tail_begin(state); - - drm_atomic_helper_update_legacy_modeset_state(dev, state); - drm_dp_mst_atomic_wait_for_dependencies(state); + struct amdgpu_device *adev = drm_to_adev(dev); + int i; - dm_state = dm_atomic_get_new_state(state); - if (dm_state && dm_state->context) { - dc_state = dm_state->context; - amdgpu_dm_commit_streams(state, dc_state); - } + if (!adev->dm.hdcp_workqueue) + return; for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct dm_crtc_state *dm_new_crtc_state; struct amdgpu_dm_connector *aconnector; - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + if (!connector || connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; aconnector = to_amdgpu_dm_connector(connector); - if (!adev->dm.hdcp_workqueue) - continue; - - pr_debug("[HDCP_DM] -------------- i : %x ----------\n", i); - - if (!connector) - continue; + drm_dbg(dev, "[HDCP_DM] -------------- i : %x ----------\n", i); - pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n", + drm_dbg(dev, "[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n", connector->index, connector->status, connector->dpms); - pr_debug("[HDCP_DM] state protection old: %x new: %x\n", + drm_dbg(dev, "[HDCP_DM] state protection old: %x new: %x\n", old_con_state->content_protection, new_con_state->content_protection); if (aconnector->dc_sink) { if (aconnector->dc_sink->sink_signal != SIGNAL_TYPE_VIRTUAL && aconnector->dc_sink->sink_signal != SIGNAL_TYPE_NONE) { - pr_debug("[HDCP_DM] pipe_ctx dispname=%s\n", + drm_dbg(dev, "[HDCP_DM] pipe_ctx dispname=%s\n", aconnector->dc_sink->edid_caps.display_name); } } @@ -10162,7 +10225,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } if (old_crtc_state) - pr_debug("old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", + drm_dbg(dev, "old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", old_crtc_state->enable, old_crtc_state->active, old_crtc_state->mode_changed, @@ -10170,29 +10233,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) old_crtc_state->connectors_changed); if (new_crtc_state) - pr_debug("NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", + drm_dbg(dev, "NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", new_crtc_state->enable, new_crtc_state->active, new_crtc_state->mode_changed, new_crtc_state->active_changed, new_crtc_state->connectors_changed); - } - - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - if (!adev->dm.hdcp_workqueue) - continue; - - new_crtc_state = NULL; - old_crtc_state = NULL; - - if (acrtc) { - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - } dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -10236,7 +10283,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) enable_encryption = true; - drm_info(adev_to_drm(adev), "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); + drm_info(dev, "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); if (aconnector->dc_link) hdcp_update_display( @@ -10244,6 +10291,45 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->hdcp_content_type, enable_encryption); } } +} + +/** + * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. + * @state: The atomic state to commit + * + * This will tell DC to commit the constructed DC state from atomic_check, + * programming the hardware. Any failures here implies a hardware failure, since + * atomic check should have filtered anything non-kosher. + */ +static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_display_manager *dm = &adev->dm; + struct dm_atomic_state *dm_state; + struct dc_state *dc_state = NULL; + u32 i, j; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + unsigned long flags; + bool wait_for_vblank = true; + struct drm_connector *connector; + struct drm_connector_state *old_con_state = NULL, *new_con_state = NULL; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int crtc_disable_count = 0; + + trace_amdgpu_dm_atomic_commit_tail_begin(state); + + drm_atomic_helper_update_legacy_modeset_state(dev, state); + drm_dp_mst_atomic_wait_for_dependencies(state); + + dm_state = dm_atomic_get_new_state(state); + if (dm_state && dm_state->context) { + dc_state = dm_state->context; + amdgpu_dm_commit_streams(state, dc_state); + } + + amdgpu_dm_update_hdcp(state); /* Handle connector state changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { @@ -10346,6 +10432,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) &stream_update); mutex_unlock(&dm->dc_lock); kfree(dummy_updates); + + drm_connector_update_privacy_screen(new_con_state); } /** @@ -10397,6 +10485,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { uint8_t cnt; + spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) { if (acrtc->dm_irq_params.window_param[cnt].enable) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b937da0a4e4a..159f8ded0439 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright (C) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. * @@ -152,6 +153,20 @@ struct idle_workqueue { bool running; }; +/** + * struct vupdate_offload_work - Work data for offloading task from vupdate handler + * @work: Kernel work data for the work event + * @adev: amdgpu_device back pointer + * @stream: DC stream associated with the crtc + * @adjust: DC CRTC timing adjust to be applied to the crtc + */ +struct vupdate_offload_work { + struct work_struct work; + struct amdgpu_device *adev; + struct dc_stream_state *stream; + struct dc_crtc_timing_adjust *adjust; +}; + #define MAX_LUMINANCE_DATA_POINTS 99 /** @@ -201,6 +216,11 @@ struct amdgpu_dm_backlight_caps { */ bool aux_support; /** + * @brightness_mask: After deriving brightness, OR it with this mask. + * Workaround for panels with issues with certain brightness values. + */ + u32 brightness_mask; + /** * @ac_level: the default brightness if booted on AC */ u8 ac_level; @@ -753,6 +773,9 @@ struct amdgpu_dm_connector { uint16_t vc_full_pbn; struct mutex handle_mst_msg_ready; + /* branch device specific data */ + uint32_t branch_ieee_oui; + /* TODO see if we can merge with ddc_bus or make a dm_connector */ struct amdgpu_i2c_adapter *i2c; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..c7387af725d6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -27,7 +28,6 @@ #include "amdgpu_dm.h" #include "dc.h" #include "modules/color/color_gamma.h" -#include "basics/conversion.h" /** * DOC: overview diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 033bd817d871..e20aa7438066 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 3da056c8d20b..95bdb8699d7f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2019 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 010172f930ae..45feb404b097 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -299,6 +299,25 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); if (enable) { + struct dc *dc = adev->dm.dc; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct psr_settings *psr = &acrtc_state->stream->link->psr_settings; + struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; + bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) || + pr->config.replay_supported; + + /* + * IPS & self-refresh feature can cause vblank counter resets between + * vblank disable and enable. + * It may cause system stuck due to waiting for the vblank counter. + * Call this function to estimate missed vblanks by using timestamps and + * update the vblank counter in DRM. + */ + if (dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL && + sr_supported && vblank->config.disable_immediate) + drm_crtc_vblank_restore(crtc); + /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index b726bcd18e29..f263e1a4537e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -3106,6 +3107,35 @@ static int replay_get_state(void *data, u64 *val) } /* + * Start / Stop capture Replay residency + */ +static int replay_set_residency(void *data, u64 val) +{ + struct amdgpu_dm_connector *connector = data; + struct dc_link *link = connector->dc_link; + bool is_start = (val != 0); + u32 residency = 0; + + link->dc->link_srv->edp_replay_residency(link, &residency, is_start, PR_RESIDENCY_MODE_PHY); + return 0; +} + +/* + * Read Replay residency + */ +static int replay_get_residency(void *data, u64 *val) +{ + struct amdgpu_dm_connector *connector = data; + struct dc_link *link = connector->dc_link; + u32 residency = 0; + + link->dc->link_srv->edp_replay_residency(link, &residency, false, PR_RESIDENCY_MODE_PHY); + *val = (u64)residency; + + return 0; +} + +/* * Read PSR state */ static int psr_get(void *data, u64 *val) @@ -3324,7 +3354,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g dmcub_trace_event_state_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(replay_state_fops, replay_get_state, NULL, "%llu\n"); - +DEFINE_DEBUGFS_ATTRIBUTE(replay_residency_fops, replay_get_residency, replay_set_residency, + "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(psr_residency_fops, psr_read_residency, NULL, "%llu\n"); @@ -3502,6 +3533,8 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) debugfs_create_file("replay_capability", 0444, dir, connector, &replay_capability_fops); debugfs_create_file("replay_state", 0444, dir, connector, &replay_state_fops); + debugfs_create_file_unsafe("replay_residency", 0444, dir, + connector, &replay_residency_fops); debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops); debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops); debugfs_create_file_unsafe("psr_residency", 0444, dir, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h index 071200473c27..122cdc124b3b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2018 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index b1d1897f5eaf..58e084f52526 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2019 Advanced Micro Devices, Inc. * @@ -222,6 +223,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, display_adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE; link_adjust.auth_delay = 2; + link_adjust.retry_limit = MAX_NUM_OF_ATTEMPTS; if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) { link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; @@ -571,6 +573,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->dp.usb4_enabled = config->usb4_enabled; display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; link->adjust.auth_delay = 2; + link->adjust.retry_limit = MAX_NUM_OF_ATTEMPTS; link->adjust.hdcp1.disable = 0; hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h index 69b445b011c8..4faa344f196e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2019 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 9e3e51a2dc49..fe100e4c9801 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index b61e210f6246..a1c722112c22 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h index ba17c23b2706..4f6b58f4f90d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index 6c9de834455b..3c9995275cbd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2020 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7187d5aedf0a..5e92eaa67aa3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2012-15 Advanced Micro Devices, Inc. * @@ -329,6 +330,34 @@ static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnect return true; } +static bool retrieve_branch_specific_data(struct amdgpu_dm_connector *aconnector) +{ + struct drm_connector *connector = &aconnector->base; + struct drm_dp_mst_port *port = aconnector->mst_output_port; + struct drm_dp_mst_port *port_parent; + struct drm_dp_aux *immediate_upstream_aux; + struct drm_dp_desc branch_desc; + + if (!port->parent) + return false; + + port_parent = port->parent->port_parent; + + immediate_upstream_aux = port_parent ? &port_parent->aux : port->mgr->aux; + + if (drm_dp_read_desc(immediate_upstream_aux, &branch_desc, true)) + return false; + + aconnector->branch_ieee_oui = (branch_desc.ident.oui[0] << 16) + + (branch_desc.ident.oui[1] << 8) + + (branch_desc.ident.oui[2]); + + drm_dbg_dp(port->aux.drm_dev, "MST branch oui 0x%x detected at %s\n", + aconnector->branch_ieee_oui, connector->name); + + return true; +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -668,6 +697,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_set_path_property(connector, pathprop); + if (!retrieve_branch_specific_data(aconnector)) + aconnector->branch_ieee_oui = 0; + /* * Initialize connector state before adding the connectror to drm and * framebuffer lists @@ -809,6 +841,7 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_dp_aux_init(&aconnector->dm_dp_aux.aux); drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux, &aconnector->base); + drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false); if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP) return; @@ -821,13 +854,20 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_connector_attach_dp_subconnector_property(&aconnector->base); } -int dm_mst_get_pbn_divider(struct dc_link *link) +uint32_t dm_mst_get_pbn_divider(struct dc_link *link) { + uint32_t pbn_div_x100; + uint64_t dividend, divisor; + if (!link) return 0; - return dc_link_bandwidth_kbps(link, - dc_link_get_link_cap(link)) / (8 * 1000 * 54); + dividend = (uint64_t)dc_link_bandwidth_kbps(link, dc_link_get_link_cap(link)) * 100; + divisor = 8 * 1000 * 54; + + pbn_div_x100 = div64_u64(dividend, divisor); + + return dfixed_const(pbn_div_x100) / 100; } struct dsc_mst_fairness_params { @@ -1762,14 +1802,20 @@ static bool dp_get_link_current_set_bw(struct drm_dp_aux *aux, uint32_t *cur_lin union lane_count_set lane_count; u8 dp_link_encoding; u8 link_bw_set = 0; + u8 data[16] = {0}; *cur_link_bw = 0; - if (drm_dp_dpcd_read(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, &dp_link_encoding, 1) != 1 || - drm_dp_dpcd_read(aux, DP_LANE_COUNT_SET, &lane_count.raw, 1) != 1 || - drm_dp_dpcd_read(aux, DP_LINK_BW_SET, &link_bw_set, 1) != 1) + if (drm_dp_dpcd_read(aux, DP_LINK_BW_SET, data, 16) != 16) return false; + dp_link_encoding = data[DP_MAIN_LINK_CHANNEL_CODING_SET - DP_LINK_BW_SET]; + link_bw_set = data[DP_LINK_BW_SET - DP_LINK_BW_SET]; + lane_count.raw = data[DP_LANE_COUNT_SET - DP_LINK_BW_SET]; + + drm_dbg_dp(aux->drm_dev, "MST_DSC downlink setting: %d, 0x%x x %d\n", + dp_link_encoding, link_bw_set, lane_count.bits.LANE_COUNT_SET); + switch (dp_link_encoding) { case DP_8b_10b_ENCODING: link_rate = link_bw_set; @@ -1866,8 +1912,10 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_link_bw = aconnector->mst_local_bw; } - if (end_link_bw > 0 && stream_kbps > end_link_bw) { - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + if (end_link_bw > 0 && + stream_kbps > end_link_bw && + aconnector->branch_ieee_oui != DP_BRANCH_DEVICE_ID_90CC24) { + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link. " "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 600d6e221011..6f7ea684b555 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2012-15 Advanced Micro Devices, Inc. * @@ -59,7 +60,7 @@ enum mst_msg_ready_type { struct amdgpu_display_manager; struct amdgpu_dm_connector; -int dm_mst_get_pbn_divider(struct dc_link *link); +uint32_t dm_mst_get_pbn_divider(struct dc_link *link); void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 848c5b4bb301..e5771f490f2e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index ff7b867ae98b..fd491b7a3cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2021 Advanced Micro Devices, Inc. * @@ -26,7 +27,6 @@ #include "amdgpu_dm_psr.h" #include "dc_dmub_srv.h" #include "dc.h" -#include "dm_helpers.h" #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index e2366321a3c1..4fb8626913cf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2021 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c index 41f07f13a7b5..82ea3fe5e764 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2023 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h index 8126bdb1eb6b..73b6c67ae5e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2021 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 0005f5f8f34f..132de4071efd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -1,3 +1,4 @@ +//SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 95f890fda8aa..aa56fd6d56c3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -1,3 +1,4 @@ +//SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3c9ecea7eebc..dc943abd6dba 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -37,6 +37,7 @@ DC_LIBS += dcn301 DC_LIBS += dcn31 DC_LIBS += dml DC_LIBS += dml2 +DC_LIBS += soc_and_ip_translator endif DC_LIBS += dce120 diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index d897f8a30ede..4da5adab799c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -1136,7 +1136,7 @@ static void calculate_bandwidth( } } } - data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed(vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1)); + data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed((uint64_t)vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1)); data->total_dmifmc_urgent_latency = bw_mul(vbios->dmifmc_urgent_latency, data->total_dmifmc_urgent_trips); data->total_display_reads_required_data = bw_int_to_fixed(0); data->total_display_reads_required_dram_access_data = bw_int_to_fixed(0); diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 452206b5095e..6073cadde76c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -284,7 +284,7 @@ struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) dc_fixpt_mul( square, res), - n * (n - 1))); + (long long)n * (n - 1))); n -= 2; } while (n != 0); diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c b/drivers/gpu/drm/amd/display/dc/basics/vector.c index 6d2924114a3e..b413a672c2c0 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/vector.c +++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c @@ -170,7 +170,7 @@ bool dal_vector_remove_at_index( memmove( vector->container + (index * vector->struct_size), vector->container + ((index + 1) * vector->struct_size), - (vector->count - index - 1) * vector->struct_size); + (size_t)(vector->count - index - 1) * vector->struct_size); vector->count -= 1; return true; @@ -219,7 +219,7 @@ bool dal_vector_insert_at( memmove( insert_address + vector->struct_size, insert_address, - vector->struct_size * (vector->count - position)); + (size_t)vector->struct_size * (vector->count - position)); memmove( insert_address, @@ -271,7 +271,7 @@ struct vector *dal_vector_clone( /* copy vector's data */ memmove(vec_cloned->container, vector->container, - vec_cloned->struct_size * vec_cloned->capacity); + (size_t)vec_cloned->struct_size * vec_cloned->capacity); return vec_cloned; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 67f08495b7e6..154fd2c18e88 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -174,11 +174,8 @@ static struct graphics_object_id bios_parser_get_connector_id( return object_id; } - if (tbl->ucNumberOfObjects <= i) { - dm_error("Can't find connector id %d in connector table of size %d.\n", - i, tbl->ucNumberOfObjects); + if (tbl->ucNumberOfObjects <= i) return object_id; - } id = le16_to_cpu(tbl->asObjects[i].usObjectID); object_id = object_id_from_bios_object_id(id); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 2bcae0643e61..58e88778da7f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -993,7 +993,7 @@ static enum bp_result set_pixel_clock_v3( allocation.sPCLKInput.usFbDiv = cpu_to_le16((uint16_t)bp_params->feedback_divider); allocation.sPCLKInput.ucFracFbDiv = - (uint8_t)bp_params->fractional_feedback_divider; + (uint8_t)(bp_params->fractional_feedback_divider / 100000); allocation.sPCLKInput.ucPostDiv = (uint8_t)bp_params->pixel_clock_post_divider; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index f5ad0a177038..dbd6ef1b60a0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = { /* ClocksStateLow */ { .display_clk_khz = 352000, .pixel_clk_khz = 330000}, /* ClocksStateNominal */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 }, /* ClocksStatePerformance */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } }; int dentist_get_divider_from_did(int did) { @@ -391,8 +391,6 @@ static void dce_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) @@ -405,11 +403,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index f8409453434c..13cf415e38e5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -120,9 +120,15 @@ void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg) { + struct dc *dc = context->clk_mgr->ctx->dc; int j; int num_cfgs = 0; + pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); + pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; + pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; + pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; + for (j = 0; j < context->stream_count; j++) { int k; @@ -164,6 +170,23 @@ void dce110_fill_display_configs( cfg->v_refresh /= stream->timing.h_total; cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) / stream->timing.v_total; + + /* Find first CRTC index and calculate its line time. + * This is necessary for DPM on SI GPUs. + */ + if (cfg->pipe_idx < pp_display_cfg->crtc_index) { + const struct dc_crtc_timing *timing = + &context->streams[0]->timing; + + pp_display_cfg->crtc_index = cfg->pipe_idx; + pp_display_cfg->line_time_in_us = + timing->h_total * 10000 / timing->pix_clk_100hz; + } + } + + if (!num_cfgs) { + pp_display_cfg->crtc_index = 0; + pp_display_cfg->line_time_in_us = 0; } pp_display_cfg->display_count = num_cfgs; @@ -223,25 +246,8 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw_ctx.bw.dce.sclk_deep_sleep_khz; - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /* TODO: dce11.2*/ - pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; - - pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); - /* TODO: is this still applicable?*/ - if (pp_display_cfg->display_count == 1) { - const struct dc_crtc_timing *timing = - &context->streams[0]->timing; - - pp_display_cfg->crtc_index = - pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz; - } - if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 0267644717b2..a39641a0ff09 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = { static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - int dprefclk_wdivider; - int dp_ref_clk_khz; - int target_div; + struct dc_context *ctx = clk_mgr_base->ctx; + int dp_ref_clk_khz = 0; - /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */ - - /* Read the mmDENTIST_DISPCLK_CNTL to get the currently - * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ - REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); - - /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ - target_div = dentist_get_divider_from_did(dprefclk_wdivider); - - /* Calculate the current DFS clock, in kHz.*/ - dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev)) + dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency; + else + dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz; return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz); } @@ -109,8 +100,6 @@ static void dce60_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) @@ -123,11 +112,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 9e2ef0e724fc..7aee02d56292 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -563,6 +563,7 @@ static void vg_clk_mgr_helper_populate_bw_params( { int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; + uint32_t max_dispclk = 0, max_dppclk = 0; j = -1; @@ -584,6 +585,15 @@ static void vg_clk_mgr_helper_populate_bw_params( return; } + /* dispclk and dppclk can be max at any voltage, same number of levels for both */ + if (clock_table->NumDispClkLevelsEnabled <= VG_NUM_DISPCLK_DPM_LEVELS && + clock_table->NumDispClkLevelsEnabled <= VG_NUM_DPPCLK_DPM_LEVELS) { + max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); + max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); + } else { + ASSERT(0); + } + bw_params->clk_table.num_entries = j + 1; for (i = 0; i < bw_params->clk_table.num_entries - 1; i++, j--) { @@ -591,11 +601,17 @@ static void vg_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->DfPstateTable[j].voltage); + + /* Now update clocks we do read */ + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; } bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, VG_NUM_DCFCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, VG_NUM_DISPCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, VG_NUM_DPPCLK_DPM_LEVELS); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 084994c650c4..8376e2b0e73d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -1047,11 +1047,8 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &num_entries_per_clk->num_fclk_levels); clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); - if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) { - num_levels = num_entries_per_clk->num_memclk_levels; - } else { - num_levels = num_entries_per_clk->num_fclk_levels; - } + num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels); + clk_mgr_base->bw_params->max_memclk_mhz = clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index b59703467128..47ff4c965d76 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1404,11 +1404,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0; - if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) { - num_levels = num_entries_per_clk->num_memclk_levels; - } else { - num_levels = num_entries_per_clk->num_fclk_levels; - } + num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels); clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9ab0ee20ca6f..5963019d1e74 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -84,6 +84,7 @@ #if defined(CONFIG_DRM_AMD_DC_FP) #include "dml2/dml2_internal_types.h" +#include "soc_and_ip_translator.h" #endif #include "dce/dmub_outbox.h" @@ -217,11 +218,24 @@ static bool create_links( connectors_num, num_virtual_links); - // condition loop on link_count to allow skipping invalid indices + /* When getting the number of connectors, the VBIOS reports the number of valid indices, + * but it doesn't say which indices are valid, and not every index has an actual connector. + * So, if we don't find a connector on an index, that is not an error. + * + * - There is no guarantee that the first N indices will be valid + * - VBIOS may report a higher amount of valid indices than there are actual connectors + * - Some VBIOS have valid configurations for more connectors than there actually are + * on the card. This may be because the manufacturer used the same VBIOS for different + * variants of the same card. + */ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) { + struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i); struct link_init_data link_init_params = {0}; struct dc_link *link; + if (connector_id.id == CONNECTOR_ID_UNKNOWN) + continue; + DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count); link_init_params.ctx = dc->ctx; @@ -446,7 +460,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) { - if (dc->optimized_required || dc->wm_optimized_required) { + if ((dc->optimized_required || dc->wm_optimized_required) && + (stream->adjust.v_total_max != adjust->v_total_max || + stream->adjust.v_total_min != adjust->v_total_min)) { stream->adjust.timing_adjust_pending = true; return false; } @@ -934,7 +950,9 @@ static void dc_destruct(struct dc *dc) } dc_destroy_resource_pool(dc); - +#ifdef CONFIG_DRM_AMD_DC_FP + dc_destroy_soc_and_ip_translator(&dc->soc_and_ip_translator); +#endif if (dc->link_srv) link_destroy_link_service(&dc->link_srv); @@ -1138,6 +1156,9 @@ static bool dc_construct(struct dc *dc, dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); DC_FP_END(); } + dc->soc_and_ip_translator = dc_create_soc_and_ip_translator(dc_ctx->dce_version); + if (!dc->soc_and_ip_translator) + goto fail; #endif if (!create_links(dc, init_params->num_virtual_links)) @@ -2398,6 +2419,18 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params goto fail; } + /* + * If not already seamless, make transition seamless by inserting intermediate minimal transition + */ + if (dc->hwss.is_pipe_topology_transition_seamless && + !dc->hwss.is_pipe_topology_transition_seamless(dc, dc->current_state, context)) { + res = commit_minimal_transition_state(dc, context); + if (res != DC_OK) { + BREAK_TO_DEBUGGER(); + goto fail; + } + } + res = dc_commit_state_no_check(dc, context); for (i = 0; i < params->stream_count; i++) { @@ -3377,7 +3410,7 @@ static void update_seamless_boot_flags(struct dc *dc, int surface_count, struct dc_stream_state *stream) { - if (get_seamless_boot_stream_count(context) > 0 && surface_count > 0) { + if (get_seamless_boot_stream_count(context) > 0 && (surface_count > 0 || stream->dpms_off)) { /* Optimize seamless boot flag keeps clocks and watermarks high until * first flip. After first flip, optimization is required to lower * bandwidth. Important to note that it is expected UEFI will @@ -4136,7 +4169,7 @@ static void commit_planes_for_stream(struct dc *dc, } if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) - dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type == UPDATE_TYPE_FAST); + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type < UPDATE_TYPE_FULL); if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) @@ -5089,129 +5122,6 @@ static bool fast_update_only(struct dc *dc, && !full_update_required(dc, srf_updates, surface_count, stream_update, stream); } -static bool update_planes_and_stream_v1(struct dc *dc, - struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_state *stream, - struct dc_stream_update *stream_update, - struct dc_state *state) -{ - const struct dc_stream_status *stream_status; - enum surface_update_type update_type; - struct dc_state *context; - struct dc_context *dc_ctx = dc->ctx; - int i, j; - struct dc_fast_update fast_update[MAX_SURFACES] = {0}; - - dc_exit_ips_for_hw_access(dc); - - populate_fast_updates(fast_update, srf_updates, surface_count, stream_update); - stream_status = dc_stream_get_status(stream); - context = dc->current_state; - - update_type = dc_check_update_surfaces_for_stream( - dc, srf_updates, surface_count, stream_update, stream_status); - /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. - * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip - * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. - */ - force_immediate_gsl_plane_flip(dc, srf_updates, surface_count); - - if (update_type >= UPDATE_TYPE_FULL) { - - /* initialize scratch memory for building context */ - context = dc_state_create_copy(state); - if (context == NULL) { - DC_ERROR("Failed to allocate new validate context!\n"); - return false; - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - - if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) - new_pipe->plane_state->force_full_update = true; - } - } else if (update_type == UPDATE_TYPE_FAST) { - /* - * Previous frame finished and HW is ready for optimization. - */ - dc_post_update_surfaces_to_stream(dc); - } - - for (i = 0; i < surface_count; i++) { - struct dc_plane_state *surface = srf_updates[i].surface; - - copy_surface_update_to_plane(surface, &srf_updates[i]); - - if (update_type >= UPDATE_TYPE_MED) { - for (j = 0; j < dc->res_pool->pipe_count; j++) { - struct pipe_ctx *pipe_ctx = - &context->res_ctx.pipe_ctx[j]; - - if (pipe_ctx->plane_state != surface) - continue; - - resource_build_scaling_params(pipe_ctx); - } - } - } - - copy_stream_update_to_stream(dc, context, stream, stream_update); - - if (update_type >= UPDATE_TYPE_FULL) { - if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) { - DC_ERROR("Mode validation failed for stream update!\n"); - dc_state_release(context); - return false; - } - } - - TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES); - - if (fast_update_only(dc, fast_update, srf_updates, surface_count, stream_update, stream) && - !dc->debug.enable_legacy_fast_update) { - commit_planes_for_stream_fast(dc, - srf_updates, - surface_count, - stream, - stream_update, - update_type, - context); - } else { - commit_planes_for_stream( - dc, - srf_updates, - surface_count, - stream, - stream_update, - update_type, - context); - } - /*update current_State*/ - if (dc->current_state != context) { - - struct dc_state *old = dc->current_state; - - dc->current_state = context; - dc_state_release(old); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->plane_state && pipe_ctx->stream == stream) - pipe_ctx->plane_state->force_full_update = false; - } - } - - /* Legacy optimization path for DCE. */ - if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) { - dc_post_update_surfaces_to_stream(dc); - TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); - } - return true; -} - static bool update_planes_and_stream_v2(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, @@ -5469,12 +5379,10 @@ void dc_commit_updates_for_stream(struct dc *dc, if (dc->ctx->dce_version >= DCN_VERSION_4_01) { ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - } else if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + } else { ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - } else - ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, - stream_update, state); + } if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) clear_update_flags(srf_updates, surface_count, stream); @@ -6450,3 +6358,21 @@ bool dc_can_clear_cursor_limit(struct dc *dc) return false; } + +void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, + struct dc_underflow_debug_data *out_data) +{ + struct timing_generator *tg = NULL; + + for (int i = 0; i < MAX_PIPES; i++) { + if (dc->res_pool->timing_generators[i] && + dc->res_pool->timing_generators[i]->inst == primary_otg_inst) { + tg = dc->res_pool->timing_generators[i]; + break; + } + } + + dc_exit_ips_for_hw_access(dc); + if (dc->hwss.get_underflow_debug_data) + dc->hwss.get_underflow_debug_data(dc, tg, out_data); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index ec4e80e5b6eb..d82b1cb467f4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -1177,6 +1177,8 @@ void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *c tg = otg_master->stream_res.tg; if (tg->funcs->wait_odm_doublebuffer_pending_clear) tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + if (tg->funcs->wait_otg_disable) + tg->funcs->wait_otg_disable(tg); } /* ODM update may require to reprogram blank pattern for each OPP */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index 130455f2802a..b7a5de4ecb61 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -520,3 +520,10 @@ enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, cons return dc->link_srv->validate_dp_tunnel_bandwidth(dc, new_ctx); } +void dc_link_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support) +{ + link->dc->link_srv->edp_get_alpm_support(link, auxless_support, auxwake_support); +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4d6181e7c612..d712548b1927 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -165,7 +165,13 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) case FAMILY_NV: dc_version = DCN_VERSION_2_0; - if (asic_id.chip_id == DEVICE_ID_NV_13FE || asic_id.chip_id == DEVICE_ID_NV_143F) { + if (asic_id.chip_id == DEVICE_ID_NV_13FE || + asic_id.chip_id == DEVICE_ID_NV_143F || + asic_id.chip_id == DEVICE_ID_NV_13F9 || + asic_id.chip_id == DEVICE_ID_NV_13FA || + asic_id.chip_id == DEVICE_ID_NV_13FB || + asic_id.chip_id == DEVICE_ID_NV_13FC || + asic_id.chip_id == DEVICE_ID_NV_13DB) { dc_version = DCN_VERSION_2_01; break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index fe9f99f1bdf9..f976ffd6d466 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -65,7 +65,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = - get_link_index_from_dpia_port_index(dc, notify->link_index); + get_link_index_from_dpia_port_index(dc, notify->instance); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 4d6bc9fd4faa..9ac2d41f8fca 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -316,6 +316,9 @@ bool dc_stream_set_cursor_attributes( { bool result = false; + if (!stream) + return false; + if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) { stream->cursor_attributes = *attributes; result = true; @@ -331,7 +334,10 @@ bool dc_stream_program_cursor_attributes( struct dc *dc; bool reset_idle_optimizations = false; - dc = stream ? stream->ctx->dc : NULL; + if (!stream) + return false; + + dc = stream->ctx->dc; if (dc_stream_set_cursor_attributes(stream, attributes)) { dc_z10_restore(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 59c07756130d..954a2786fbe2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.340" +#define DC_VER "3.2.349" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -234,6 +234,7 @@ struct lut3d_caps { * @ogam_ram: programmable out gamma LUT * @ocsc: output color space conversion matrix * @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT + * @num_rmcm_3dluts: number of RMCM 3D LUTS; always assumes a preceding shaper LUT * @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single * instance * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT @@ -694,6 +695,15 @@ struct dc_clocks { int idle_fclk_khz; int subvp_prefetch_dramclk_khz; int subvp_prefetch_fclk_khz; + + /* Stutter efficiency is technically not clock values + * but stored here so the values are part of the update_clocks call similar to num_ways + * Efficiencies are stored as percentage (0-100) + */ + struct { + uint8_t base_efficiency; //LP1 + uint8_t low_power_efficiency; //LP2 + } stutter_efficiency; }; struct dc_bw_validation_profile { @@ -839,7 +849,8 @@ union dpia_debug_options { uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_usb4_bw_zero_alloc_patch:1; /* bit 6 */ - uint32_t reserved:25; + uint32_t enable_bw_allocation_mode:1; /* bit 7 */ + uint32_t reserved:24; } bits; uint32_t raw; }; @@ -1072,6 +1083,7 @@ struct dc_debug_options { unsigned int force_mall_ss_num_ways; bool alloc_extra_way_for_cursor; uint32_t subvp_extra_lines; + bool disable_force_pstate_allow_on_hw_release; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; @@ -1145,6 +1157,12 @@ struct dc_debug_options { bool enable_hblank_borrow; bool force_subvp_df_throttle; uint32_t acpi_transition_bitmasks[MAX_PIPES]; + bool enable_pg_cntl_debug_logs; + unsigned int auxless_alpm_lfps_setup_ns; + unsigned int auxless_alpm_lfps_period_ns; + unsigned int auxless_alpm_lfps_silence_ns; + unsigned int auxless_alpm_lfps_t1t2_us; + short auxless_alpm_lfps_t1t2_offset_us; }; @@ -1305,6 +1323,32 @@ union dc_3dlut_state { }; +#define MATRIX_9C__DIM_128_ALIGNED_LEN 16 // 9+8 : 9 * 8 + 7 * 8 = 72 + 56 = 128 % 128 = 0 +#define MATRIX_17C__DIM_128_ALIGNED_LEN 32 //17+15: 17 * 8 + 15 * 8 = 136 + 120 = 256 % 128 = 0 +#define MATRIX_33C__DIM_128_ALIGNED_LEN 64 //17+47: 17 * 8 + 47 * 8 = 136 + 376 = 512 % 128 = 0 + +struct lut_rgb { + uint16_t b; + uint16_t g; + uint16_t r; + uint16_t padding; +}; + +//this structure maps directly to how the lut will read it from memory +struct lut_mem_mapping { + union { + //NATIVE MODE 1, 2 + //RGB layout [b][g][r] //red is 128 byte aligned + //BGR layout [r][g][b] //blue is 128 byte aligned + struct lut_rgb rgb_17c[17][17][MATRIX_17C__DIM_128_ALIGNED_LEN]; + struct lut_rgb rgb_33c[33][33][MATRIX_33C__DIM_128_ALIGNED_LEN]; + + //TRANSFORMED + uint16_t linear_rgb[(33*33*33*4/128+1)*128]; + }; + uint16_t size; +}; + struct dc_rmcm_3dlut { bool isInUse; const struct dc_stream_state *stream; @@ -1733,7 +1777,7 @@ struct dc { struct dml2_configuration_options dml2_options; struct dml2_configuration_options dml2_dc_power_options; enum dc_acpi_cm_power_state power_state; - + struct soc_and_ip_translator *soc_and_ip_translator; }; struct dc_scaling_info { @@ -1786,6 +1830,23 @@ struct dc_surface_update { struct dc_bias_and_scale bias_and_scale; }; +struct dc_underflow_debug_data { + uint32_t otg_inst; + uint32_t otg_underflow; + uint32_t h_position; + uint32_t v_position; + uint32_t otg_frame_count; + struct dc_underflow_per_hubp_debug_data { + uint32_t hubp_underflow; + uint32_t hubp_in_blank; + uint32_t hubp_readline; + uint32_t det_config_error; + } hubps[MAX_PIPES]; + uint32_t curr_det_sizes[MAX_PIPES]; + uint32_t target_det_sizes[MAX_PIPES]; + uint32_t compbuf_config_error; +}; + /* * Create a new surface with default parameters; */ @@ -1804,8 +1865,6 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut); void dc_post_update_surfaces_to_stream( struct dc *dc); -#include "dc_stream.h" - /** * struct dc_validation_set - Struct to store surface/stream associations for validation */ @@ -2447,6 +2506,12 @@ void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( */ enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx); +/* + * Get if ALPM is supported by the link + */ +void dc_link_get_alpm_support(struct dc_link *link, bool *auxless_support, + bool *auxwake_support); + /* Sink Interfaces - A sink corresponds to a display output device */ struct dc_container_id { @@ -2674,4 +2739,17 @@ bool dc_is_timing_changed(struct dc_stream_state *cur_stream, bool dc_is_cursor_limit_pending(struct dc *dc); bool dc_can_clear_cursor_limit(struct dc *dc); +/** + * dc_get_underflow_debug_data_for_otg() - Retrieve underflow debug data. + * + * @dc: Pointer to the display core context. + * @primary_otg_inst: Instance index of the primary OTG that underflowed. + * @out_data: Pointer to a dc_underflow_debug_data struct to be filled with debug information. + * + * This function collects and logs underflow-related HW states when underflow happens, + * including OTG underflow status, current read positions, frame count, and per-HUBP debug data. + * The results are stored in the provided out_data structure for further analysis or logging. + */ +void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, struct dc_underflow_debug_data *out_data); + #endif /* DC_INTERFACE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index f5ef1a07078e..53a088ebddef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -2010,11 +2010,12 @@ bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv) return result; } -bool dmub_lsdma_send_linear_copy_packet( +bool dmub_lsdma_send_linear_copy_command( struct dc_dmub_srv *dc_dmub_srv, uint64_t src_addr, uint64_t dst_addr, - uint32_t count) + uint32_t count +) { struct dc_context *dc_ctx = dc_dmub_srv->ctx; union dmub_rb_cmd cmd; @@ -2042,9 +2043,54 @@ bool dmub_lsdma_send_linear_copy_packet( return result; } +bool dmub_lsdma_send_linear_sub_window_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_linear_sub_window_copy_params copy_data +) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.linear_sub_window_copy_data.tmz = copy_data.tmz; + lsdma_data->u.linear_sub_window_copy_data.element_size = copy_data.element_size; + lsdma_data->u.linear_sub_window_copy_data.src_lo = copy_data.src_lo; + lsdma_data->u.linear_sub_window_copy_data.src_hi = copy_data.src_hi; + lsdma_data->u.linear_sub_window_copy_data.src_x = copy_data.src_x; + lsdma_data->u.linear_sub_window_copy_data.src_y = copy_data.src_y; + lsdma_data->u.linear_sub_window_copy_data.src_pitch = copy_data.src_pitch; + lsdma_data->u.linear_sub_window_copy_data.src_slice_pitch = copy_data.src_slice_pitch; + lsdma_data->u.linear_sub_window_copy_data.dst_lo = copy_data.dst_lo; + lsdma_data->u.linear_sub_window_copy_data.dst_hi = copy_data.dst_hi; + lsdma_data->u.linear_sub_window_copy_data.dst_x = copy_data.dst_x; + lsdma_data->u.linear_sub_window_copy_data.dst_y = copy_data.dst_y; + lsdma_data->u.linear_sub_window_copy_data.dst_pitch = copy_data.dst_pitch; + lsdma_data->u.linear_sub_window_copy_data.dst_slice_pitch = copy_data.dst_slice_pitch; + lsdma_data->u.linear_sub_window_copy_data.rect_x = copy_data.rect_x; + lsdma_data->u.linear_sub_window_copy_data.rect_y = copy_data.rect_y; + lsdma_data->u.linear_sub_window_copy_data.src_cache_policy = copy_data.src_cache_policy; + lsdma_data->u.linear_sub_window_copy_data.dst_cache_policy = copy_data.dst_cache_policy; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Linear Sub Window Copy failed in DMUB"); + + return result; +} + bool dmub_lsdma_send_tiled_to_tiled_copy_command( struct dc_dmub_srv *dc_dmub_srv, - struct lsdma_send_tiled_to_tiled_copy_command_params params) + struct lsdma_send_tiled_to_tiled_copy_command_params params +) { struct dc_context *dc_ctx = dc_dmub_srv->ctx; union dmub_rb_cmd cmd; @@ -2066,8 +2112,8 @@ bool dmub_lsdma_send_tiled_to_tiled_copy_command( lsdma_data->u.tiled_copy_data.src_y = params.src_y; lsdma_data->u.tiled_copy_data.dst_x = params.dst_x; lsdma_data->u.tiled_copy_data.dst_y = params.dst_y; - lsdma_data->u.tiled_copy_data.src_width = params.src_width - 1; // LSDMA controller expects width -1 - lsdma_data->u.tiled_copy_data.dst_width = params.dst_width - 1; // LSDMA controller expects width -1 + lsdma_data->u.tiled_copy_data.src_width = params.src_width; + lsdma_data->u.tiled_copy_data.dst_width = params.dst_width; lsdma_data->u.tiled_copy_data.src_swizzle_mode = params.swizzle_mode; lsdma_data->u.tiled_copy_data.dst_swizzle_mode = params.swizzle_mode; lsdma_data->u.tiled_copy_data.src_element_size = params.element_size; @@ -2078,8 +2124,8 @@ bool dmub_lsdma_send_tiled_to_tiled_copy_command( lsdma_data->u.tiled_copy_data.tmz = params.tmz; lsdma_data->u.tiled_copy_data.read_compress = params.read_compress; lsdma_data->u.tiled_copy_data.write_compress = params.write_compress; - lsdma_data->u.tiled_copy_data.src_height = params.src_height - 1; // LSDMA controller expects height -1 - lsdma_data->u.tiled_copy_data.dst_height = params.dst_height - 1; // LSDMA controller expects height -1 + lsdma_data->u.tiled_copy_data.src_height = params.src_height; + lsdma_data->u.tiled_copy_data.dst_height = params.dst_height; lsdma_data->u.tiled_copy_data.data_format = params.data_format; lsdma_data->u.tiled_copy_data.max_com = params.max_com; lsdma_data->u.tiled_copy_data.max_uncom = params.max_uncom; @@ -2097,7 +2143,8 @@ bool dmub_lsdma_send_pio_copy_command( uint64_t src_addr, uint64_t dst_addr, uint32_t byte_count, - uint32_t overlap_disable) + uint32_t overlap_disable +) { struct dc_context *dc_ctx = dc_dmub_srv->ctx; union dmub_rb_cmd cmd; @@ -2130,7 +2177,8 @@ bool dmub_lsdma_send_pio_constfill_command( struct dc_dmub_srv *dc_dmub_srv, uint64_t dst_addr, uint32_t byte_count, - uint32_t data) + uint32_t data +) { struct dc_context *dc_ctx = dc_dmub_srv->ctx; union dmub_rb_cmd cmd; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 8ea320f21269..7ef93444ef3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -211,11 +211,45 @@ void dc_dmub_srv_fams2_passthrough_flip( int surface_count); bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv); -bool dmub_lsdma_send_linear_copy_packet( +bool dmub_lsdma_send_linear_copy_command( struct dc_dmub_srv *dc_dmub_srv, uint64_t src_addr, uint64_t dst_addr, uint32_t count); + +struct lsdma_linear_sub_window_copy_params { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_pitch : 16; + uint32_t dst_pitch : 16; + + uint32_t src_slice_pitch; + uint32_t dst_slice_pitch; + + uint32_t tmz : 1; + uint32_t element_size : 3; + uint32_t src_cache_policy : 3; + uint32_t dst_cache_policy : 3; + uint32_t padding : 22; +}; + +bool dmub_lsdma_send_linear_sub_window_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_linear_sub_window_copy_params copy_data +); bool dmub_lsdma_send_pio_copy_command( struct dc_dmub_srv *dc_dmub_srv, uint64_t src_addr, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 5ce1be362534..db669ccb1d58 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1021,7 +1021,8 @@ union dp_128b_132b_supported_lttpr_link_rates { union dp_alpm_lttpr_cap { struct { uint8_t AUX_LESS_ALPM_SUPPORTED :1; - uint8_t RESERVED :7; + uint8_t ASSR_SUPPORTED :1; + uint8_t RESERVED :6; } bits; uint8_t raw; }; @@ -1119,10 +1120,11 @@ union dp_128b_132b_training_aux_rd_interval { union edp_alpm_caps { struct { - uint8_t AUX_WAKE_ALPM_CAP :1; - uint8_t PM_STATE_2A_SUPPORT :1; - uint8_t AUX_LESS_ALPM_CAP :1; - uint8_t RESERVED :5; + uint8_t AUX_WAKE_ALPM_CAP :1; + uint8_t PM_STATE_2A_SUPPORT :1; + uint8_t AUX_LESS_ALPM_CAP :1; + uint8_t AUX_LESS_ALPM_ML_PHY_SLEEP_STATUS_SUPPORTED :1; + uint8_t RESERVED :4; } bits; uint8_t raw; }; @@ -1282,6 +1284,7 @@ struct dpcd_caps { union dp_receive_port0_cap receive_port0_cap; /* Indicates the number of SST links supported by MSO (Multi-Stream Output) */ uint8_t mso_cap_sst_links_supported; + uint8_t dp_edp_general_cap_2; }; union dpcd_sink_ext_caps { @@ -1347,7 +1350,9 @@ union dpcd_alpm_configuration { struct { unsigned char ENABLE : 1; unsigned char IRQ_HPD_ENABLE : 1; - unsigned char RESERVED : 6; + unsigned char ALPM_MODE_SEL : 1; + unsigned char ACDS_PERIOD_DURATION : 1; + unsigned char RESERVED : 4; } bits; unsigned char raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 7217de258851..51e41aed7316 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -732,7 +732,7 @@ char *dce_version_to_string(const int version) case DCN_VERSION_3_03: return "DCN 3.0.3"; case DCN_VERSION_3_1: - return "DCN 3.1"; + return "DCN 3.1.2"; case DCN_VERSION_3_14: return "DCN 3.1.4"; case DCN_VERSION_3_15: diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 375ca2f13b7a..619834a328a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -563,6 +563,12 @@ struct dc_info_packet_128 { uint8_t sb[128]; }; +struct dc_edid_read_policy { + uint32_t max_retry_count; + uint32_t delay_time_ms; + uint32_t ignore_checksum; +}; + #define DC_PLANE_UPDATE_TIMES_MAX 10 struct dc_plane_flip_time { @@ -571,6 +577,12 @@ struct dc_plane_flip_time { unsigned int prev_update_time_in_us; }; +enum dc_alpm_mode { + DC_ALPM_AUXWAKE = 0, + DC_ALPM_AUXLESS = 1, + DC_ALPM_UNSUPPORTED = 0xF, +}; + enum dc_psr_state { PSR_STATE0 = 0x0, PSR_STATE1, @@ -616,6 +628,7 @@ struct psr_config { unsigned int line_time_in_us; uint8_t rate_control_caps; uint16_t dsc_slice_height; + bool os_request_force_ffu; }; union dmcu_psr_level { @@ -728,6 +741,7 @@ struct psr_context { unsigned int line_time_in_us; uint8_t rate_control_caps; uint16_t dsc_slice_height; + bool os_request_force_ffu; }; struct colorspace_transform { @@ -1137,6 +1151,10 @@ struct replay_config { bool low_rr_supported; /* Replay Video Conferencing Optimization Enabled */ bool replay_video_conferencing_optimization_enabled; + /* Replay alpm mode */ + enum dc_alpm_mode alpm_mode; + /* Replay full screen only */ + bool os_request_force_ffu; }; /* Replay feature flags*/ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 58c84f555c0f..0ce9489ac6b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -133,30 +133,34 @@ enum dsc_clk_source { }; -static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_symclk32_se_rcg( @@ -385,35 +389,34 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) } } -static void dccg35_set_dppclk_rcg(struct dccg *dccg, - int inst, bool enable) +static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { - struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); break; } - //DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1); + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_dpstreamclk_rcg( @@ -1177,32 +1180,34 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, } static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg, - uint32_t dpp_inst, uint32_t enable) + uint32_t dpp_inst, uint32_t disallow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg) return; switch (dpp_inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg); break; default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable); + /* Wait for clock to ramp */ + if (disallow_rcg) + udelay(10); } static void dccg35_get_pixel_rate_div( @@ -1782,8 +1787,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) //Disable DTO switch (inst) { case 0: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, 0, @@ -1791,8 +1795,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); break; case 1: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, 0, @@ -1800,8 +1803,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, 0, @@ -1809,8 +1811,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, 0, @@ -1821,6 +1822,9 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + udelay(10); } static void dccg35_disable_dscclk(struct dccg *dccg, @@ -1864,6 +1868,9 @@ static void dccg35_disable_dscclk(struct dccg *dccg, default: return; } + + /* Wait for clock ramp */ + udelay(10); } static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) @@ -2349,10 +2356,7 @@ static void dccg35_disable_symclk_se_cb( void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating) { - - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { - dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); - } + dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); } static const struct dccg_funcs dccg35_funcs_new = { diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 668ee2d405fd..0b8ed9b94d3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -619,7 +619,7 @@ void dccg401_set_dp_dto( dto_integer = div_u64(params->pixclk_hz, dto_modulo_hz); dto_phase_hz = params->pixclk_hz - dto_integer * dto_modulo_hz; - if (dto_phase_hz <= 0) { + if (dto_phase_hz <= 0 && dto_integer <= 0) { /* negative pixel rate should never happen */ BREAK_TO_DEBUGGER(); return; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index bb4ac5042c80..673bb87d2c17 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -725,14 +725,18 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, for (i = 0; i < AUX_MAX_RETRIES; i++) { DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, - "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: address=0x%04x length=%u write=%d mot=%d", + "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: " + "address=0x%04x length=%u write=%d mot=%d is_i2c=%d is_dpia=%d ddc_hw_inst=%d", ddc && ddc->link ? ddc->link->link_index : UINT_MAX, i + 1, (int)AUX_MAX_RETRIES, payload->address, payload->length, (unsigned int) payload->write, - (unsigned int) payload->mot); + (unsigned int) payload->mot, + payload->i2c_over_aux, + (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false, + ddc->link->ddc_hw_inst); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); @@ -746,7 +750,9 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, - "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", + "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: " + "address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d " + "payload->reply=%u is_i2c=%d is_dpia=%d ddc_hw_inst=%d", ddc && ddc->link ? ddc->link->link_index : UINT_MAX, i + 1, (int)AUX_MAX_RETRIES, @@ -756,7 +762,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot, ret, (int)operation_result, - (unsigned int) *payload->reply); + (unsigned int) *payload->reply, + payload->i2c_over_aux, + (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false, + ddc->link->ddc_hw_inst); if (!payload->write) dce_aux_log_payload(" read", payload->data, ret > 0 ? ret : 0, 16); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 0421b267a0b5..365dd2e37aea 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -591,7 +591,7 @@ static bool dce_i2c_hw_engine_submit_payload(struct dce_i2c_hw *dce_i2c_hw, DCE_I2C_TRANSACTION_ACTION_I2C_WRITE; - request.address = (uint8_t) ((payload->address << 1) | !payload->write); + request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1)); request.length = payload->length; request.data = payload->data; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c index e188447c8156..2d73b94c515c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c @@ -451,7 +451,7 @@ static bool dce_i2c_sw_engine_submit_payload(struct dce_i2c_sw *engine, DCE_I2C_TRANSACTION_ACTION_I2C_WRITE_MOT : DCE_I2C_TRANSACTION_ACTION_I2C_WRITE; - request.address = (uint8_t) ((payload->address << 1) | !payload->write); + request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1)); request.length = payload->length; request.data = payload->data; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 4a9d07c31bc5..0c50fe266c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -896,13 +896,13 @@ void dce110_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); @@ -1798,13 +1798,13 @@ void dce60_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ff3b8244ba3d..87af4fdc04a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -391,7 +391,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, sizeof(DP_SINK_DEVICE_STR_ID_1))) link->psr_settings.force_ffu_mode = 1; - copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode; + copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode || psr_context->os_request_force_ffu; if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE && !link->dc->debug.disable_fec) && diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index e7a318e26d38..65b979617b0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -3,8 +3,8 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc.h" +#include "link.h" #include "dc_dmub_srv.h" -#include "dc_dp_types.h" #include "dmub/dmub_srv.h" #include "core_types.h" #include "dmub_replay.h" @@ -44,45 +44,21 @@ static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *s /* * Enable/Disable Replay. */ -static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst, - struct dc_link *link) +static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; uint32_t retry_count; enum replay_state state = REPLAY_STATE_0; - struct pipe_ctx *pipe_ctx = NULL; - struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; - uint8_t i; memset(&cmd, 0, sizeof(cmd)); cmd.replay_enable.header.type = DMUB_CMD__REPLAY; cmd.replay_enable.data.panel_inst = panel_inst; cmd.replay_enable.header.sub_type = DMUB_CMD__REPLAY_ENABLE; - if (enable) { + if (enable) cmd.replay_enable.data.enable = REPLAY_ENABLE; - // hpo stream/link encoder assignments are not static, need to update everytime we try to enable replay - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - for (i = 0; i < MAX_PIPES; i++) { - if (res_ctx && - res_ctx->pipe_ctx[i].stream && - res_ctx->pipe_ctx[i].stream->link && - res_ctx->pipe_ctx[i].stream->link == link && - res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { - pipe_ctx = &res_ctx->pipe_ctx[i]; - //TODO: refactor for multi edp support - break; - } - } - - if (!pipe_ctx) - return; - - cmd.replay_enable.data.hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - cmd.replay_enable.data.hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - } - } else + else cmd.replay_enable.data.enable = REPLAY_DISABLE; cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data); @@ -174,17 +150,6 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->digbe_inst = replay_context->digbe_inst; copy_settings_data->digfe_inst = replay_context->digfe_inst; - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - if (pipe_ctx->stream_res.hpo_dp_stream_enc) - copy_settings_data->hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - else - copy_settings_data->hpo_stream_enc_inst = 0; - if (pipe_ctx->link_res.hpo_dp_link_enc) - copy_settings_data->hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - else - copy_settings_data->hpo_link_enc_inst = 0; - } - if (pipe_ctx->plane_res.dpp) copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; else @@ -225,6 +190,18 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; + copy_settings_data->flags.bitfields.alpm_mode = (enum dmub_alpm_mode)link->replay_settings.config.alpm_mode; + if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) { + copy_settings_data->auxless_alpm_data.lfps_setup_ns = dc->dc->debug.auxless_alpm_lfps_setup_ns; + copy_settings_data->auxless_alpm_data.lfps_period_ns = dc->dc->debug.auxless_alpm_lfps_period_ns; + copy_settings_data->auxless_alpm_data.lfps_silence_ns = dc->dc->debug.auxless_alpm_lfps_silence_ns; + copy_settings_data->auxless_alpm_data.lfps_t1_t2_override_us = + dc->dc->debug.auxless_alpm_lfps_t1t2_us; + copy_settings_data->auxless_alpm_data.lfps_t1_t2_offset_us = + dc->dc->debug.auxless_alpm_lfps_t1t2_offset_us; + copy_settings_data->auxless_alpm_data.lttpr_count = link->dc->link_srv->dp_get_lttpr_count(link); + } + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; @@ -247,7 +224,6 @@ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL; pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data); - pCmd->replay_set_coasting_vtotal_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index ccbe385e132c..e6346c0ffc0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -19,7 +19,7 @@ struct dmub_replay_funcs { void (*replay_get_state)(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst); void (*replay_enable)(struct dmub_replay *dmub, bool enable, bool wait, - uint8_t panel_inst, struct dc_link *link); + uint8_t panel_inst); bool (*replay_copy_settings)(struct dmub_replay *dmub, struct dc_link *link, struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c index e0558a78b11c..1c1228116487 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c @@ -812,7 +812,7 @@ bool dcn10_link_encoder_validate_output_with_stream( enc10, &stream->timing); break; case SIGNAL_TYPE_EDP: - is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false; + is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB; break; case SIGNAL_TYPE_VIRTUAL: is_valid = true; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6ab2a218b769..6f30b6cc3c76 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -397,7 +397,7 @@ static bool enc35_is_fifo_enabled(struct stream_encoder *enc) uint32_t reset_val; REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); - return (reset_val == 0) ? false : true; + return reset_val != 0; } void enc35_disable_fifo(struct stream_encoder *enc) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 390c1a77fda6..9c58ff1069d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -646,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, // the dpte_group_bytes is reduced for the specific case of vertical // access of a tile surface that has dpte request of 8x1 ptes. - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 843d6004258c..570e6e39eb45 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -646,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, // the dpte_group_bytes is reduced for the specific case of vertical // access of a tile surface that has dpte request of 8x1 ptes. - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 5718000627b0..f549da082c01 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -652,7 +652,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index 8d4873f80df0..4fb37df54d59 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -620,7 +620,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 0c0b2d67c9cd..1aaa77265eed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -326,7 +326,7 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; - int j; + int j = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0; dc_assert_fp_enabled(); @@ -338,6 +338,15 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p dcn3_01_soc.num_chans = bw_params->num_channels; ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn3_01_soc.num_states - 1; j >= 0; j--) { @@ -353,8 +362,13 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + /* Clocks independent of voltage level. */ + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; @@ -435,12 +449,12 @@ void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc, &context->bw_ctx.dml, pipes, pipe_cnt); /* WM Set C */ table_entry = &bw_params->wm_table.entries[WM_C]; - vlevel = min(max(vlevel_req, 2), vlevel_max); + vlevel = clamp(vlevel_req, 2, vlevel_max); calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, &context->bw_ctx.dml, pipes, pipe_cnt); /* WM Set B */ table_entry = &bw_params->wm_table.entries[WM_B]; - vlevel = min(max(vlevel_req, 1), vlevel_max); + vlevel = clamp(vlevel_req, 1, vlevel_max); calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, &context->bw_ctx.dml, pipes, pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c index 8da97a96b1ce..8d7c59ec701d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c @@ -280,7 +280,7 @@ void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p j = 0; /* create the final dcfclk and uclk table */ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c index e968870a4b81..b5d3fd4c3694 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c @@ -285,7 +285,7 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p j = 0; /* create the final dcfclk and uclk table */ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index c46bda2141ac..bfeb01477f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -615,7 +615,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c index b7d2a0caec11..04df263ff65e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c @@ -703,7 +703,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 6160952245b4..18388fb00be8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -3229,7 +3229,7 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { @@ -3401,7 +3401,7 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) uint32_t height = subvp_active_margin_list.res[i].height; refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + (uint64_t)pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c index 9ba6cb67655f..6c75aa82327a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c @@ -139,7 +139,6 @@ void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs, if (dual_plane) { unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - ; if (src->sw_mode == dm_sw_linear) ASSERT(p1_pte_row_height_linear >= 8); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 8839faf42207..e0a1dc89ce43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -779,7 +779,7 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index a06217a9eef6..f6879e622271 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -8,7 +8,7 @@ #include "dml2_internal_types.h" #include "dml21_utils.h" #include "dml21_translation_helper.h" -#include "bounding_boxes/dcn4_soc_bb.h" +#include "soc_and_ip_translator.h" static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, const struct dc *in_dc, @@ -38,375 +38,37 @@ static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } -/* - * Populate dml_init based on default static values in soc bb. The default - * values are for reference and support at least minimal operation of current - * SoC and DCN hardware. The values could be modifed by subsequent override - * functions to reflect our true hardware capability. - */ -static void populate_default_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) +static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version) { - switch (in_dc->ctx->dce_version) { + enum dml2_project_id project_id; + switch (dcn_version) { case DCN_VERSION_4_01: - dml_init->options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; - dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); - dml_init->soc_bb = dml2_socbb_dcn401; - dml_init->soc_bb.qos_parameters = dml_dcn4_variant_a_soc_qos_params; - dml_init->ip_caps = dml2_dcn401_max_ip_caps; + project_id = dml2_project_dcn4x_stage2_auto_drr_svp; break; default: - memset(dml_init, 0, sizeof(*dml_init)); + project_id = dml2_project_invalid; DC_ERR("unsupported dcn version for DML21!"); - return; - } -} - -static void override_dml_init_with_values_from_hardware_default(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - dml_init->soc_bb.dchub_refclk_mhz = in_dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; - dml_init->soc_bb.dprefclk_mhz = in_dc->clk_mgr->dprefclk_khz / 1000; - dml_init->soc_bb.dispclk_dppclk_vco_speed_mhz = in_dc->clk_mgr->dentist_vco_freq_khz / 1000.0; -} - -/* - * SMU stands for System Management Unit. It is a power management processor. - * It owns the initialization of dc's clock table and programming of clock values - * based on dc's requests. - * Our clock values in base soc bb is a dummy placeholder. The real clock values - * are retrieved from SMU firmware to dc clock table at runtime. - * This function overrides our dummy placeholder values with real values in dc - * clock table. - */ -static void override_dml_init_with_values_from_smu( - struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - int i; - const struct clk_bw_params *dc_bw_params = in_dc->clk_mgr->bw_params; - const struct clk_limit_table *dc_clk_table = &dc_bw_params->clk_table; - struct dml2_soc_state_table *dml_clk_table = &dml_init->soc_bb.clk_table; - - if (!in_dc->clk_mgr->funcs->is_smu_present || - !in_dc->clk_mgr->funcs->is_smu_present(in_dc->clk_mgr)) - /* skip if smu is not present */ - return; - - /* dcfclk */ - if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) { - dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dcfclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz && - dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) { - dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000; - dml_clk_table->dcfclk.num_clk_values = i + 1; - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = 0; - dml_clk_table->dcfclk.num_clk_values = i; - } - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000; - } - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = 0; - } - } - } - - /* fclk */ - if (dc_clk_table->num_entries_per_clk.num_fclk_levels) { - dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->fclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz && - dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) { - dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000; - dml_clk_table->fclk.num_clk_values = i + 1; - } else { - dml_clk_table->fclk.clk_values_khz[i] = 0; - dml_clk_table->fclk.num_clk_values = i; - } - } else { - dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000; - } - } else { - dml_clk_table->fclk.clk_values_khz[i] = 0; - } - } - } - - /* uclk */ - if (dc_clk_table->num_entries_per_clk.num_memclk_levels) { - dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->uclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz && - dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) { - dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000; - dml_clk_table->uclk.num_clk_values = i + 1; - } else { - dml_clk_table->uclk.clk_values_khz[i] = 0; - dml_clk_table->uclk.num_clk_values = i; - } - } else { - dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000; - } - } else { - dml_clk_table->uclk.clk_values_khz[i] = 0; - } - } - } - - /* dispclk */ - if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) { - dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dispclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz && - dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) { - dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000; - dml_clk_table->dispclk.num_clk_values = i + 1; - } else { - dml_clk_table->dispclk.clk_values_khz[i] = 0; - dml_clk_table->dispclk.num_clk_values = i; - } - } else { - dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000; - } - } else { - dml_clk_table->dispclk.clk_values_khz[i] = 0; - } - } - } - - /* dppclk */ - if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) { - dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dppclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz && - dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) { - dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000; - dml_clk_table->dppclk.num_clk_values = i + 1; - } else { - dml_clk_table->dppclk.clk_values_khz[i] = 0; - dml_clk_table->dppclk.num_clk_values = i; - } - } else { - dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000; - } - } else { - dml_clk_table->dppclk.clk_values_khz[i] = 0; - } - } - } - - /* dtbclk */ - if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) { - dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dtbclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz && - dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) { - dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000; - dml_clk_table->dtbclk.num_clk_values = i + 1; - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = 0; - dml_clk_table->dtbclk.num_clk_values = i; - } - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000; - } - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = 0; - } - } - } - - /* socclk */ - if (dc_clk_table->num_entries_per_clk.num_socclk_levels) { - dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->socclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz && - dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) { - dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000; - dml_clk_table->socclk.num_clk_values = i + 1; - } else { - dml_clk_table->socclk.clk_values_khz[i] = 0; - dml_clk_table->socclk.num_clk_values = i; - } - } else { - dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000; - } - } else { - dml_clk_table->socclk.clk_values_khz[i] = 0; - } - } - } -} - -static void override_dml_init_with_values_from_vbios( - struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - const struct clk_bw_params *dc_bw_params = in_dc->clk_mgr->bw_params; - struct dml2_soc_bb *dml_soc_bb = &dml_init->soc_bb; - struct dml2_soc_state_table *dml_clk_table = &dml_init->soc_bb.clk_table; - - if (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns > 0) - dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = - (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns + 9) / 10; - - if (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns > 0) - dml_soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = - (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns + 9) / 10; - - if (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns > 0) - dml_soc_bb->power_management_parameters.stutter_exit_latency_us = - (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; - - if (dc_bw_params->num_channels) { - dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; - dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; - } else if (in_dc->ctx->dc_bios->vram_info.num_chans) { - dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; - dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; - } - - if (dc_bw_params->dram_channel_width_bytes) { - dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; - } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { - dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + break; } - dml_init->soc_bb.xtalclk_mhz = in_dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000; + return project_id; } - -static void override_dml_init_with_values_from_dmub(struct dml2_initialize_instance_in_out *dml_init, +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc) { - /* - * TODO - There seems to be overlaps between the values overriden from - * dmub and vbios. Investigate and identify the values that DMUB needs - * to own. - */ -// const struct dmub_soc_bb_params *dmub_bb_params = -// (const struct dmub_soc_bb_params *)config->bb_from_dmub; - -// if (dmub_bb_params == NULL) -// return; - -// if (dmub_bb_params->dram_clk_change_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.dram_clk_change_blackout_us = -// (double) dmub_bb_params->dram_clk_change_blackout_ns / 1000.0; -// if (dmub_bb_params->dram_clk_change_read_only_ns > 0) -// dml_init->soc_bb.power_management_parameters.dram_clk_change_read_only_us = -// (double) dmub_bb_params->dram_clk_change_read_only_ns / 1000.0; -// if (dmub_bb_params->dram_clk_change_write_only_ns > 0) -// dml_init->soc_bb.power_management_parameters.dram_clk_change_write_only_us = -// (double) dmub_bb_params->dram_clk_change_write_only_ns / 1000.0; -// if (dmub_bb_params->fclk_change_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.fclk_change_blackout_us = -// (double) dmub_bb_params->fclk_change_blackout_ns / 1000.0; -// if (dmub_bb_params->g7_ppt_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.g7_ppt_blackout_us = -// (double) dmub_bb_params->g7_ppt_blackout_ns / 1000.0; -// if (dmub_bb_params->stutter_enter_plus_exit_latency_ns > 0) -// dml_init->soc_bb.power_management_parameters.stutter_enter_plus_exit_latency_us = -// (double) dmub_bb_params->stutter_enter_plus_exit_latency_ns / 1000.0; -// if (dmub_bb_params->stutter_exit_latency_ns > 0) -// dml_init->soc_bb.power_management_parameters.stutter_exit_latency_us = -// (double) dmub_bb_params->stutter_exit_latency_ns / 1000.0; -// if (dmub_bb_params->z8_stutter_enter_plus_exit_latency_ns > 0) -// dml_init->soc_bb.power_management_parameters.z8_stutter_enter_plus_exit_latency_us = -// (double) dmub_bb_params->z8_stutter_enter_plus_exit_latency_ns / 1000.0; -// if (dmub_bb_params->z8_stutter_exit_latency_ns > 0) -// dml_init->soc_bb.power_management_parameters.z8_stutter_exit_latency_us = -// (double) dmub_bb_params->z8_stutter_exit_latency_ns / 1000.0; -// if (dmub_bb_params->z8_min_idle_time_ns > 0) -// dml_init->soc_bb.power_management_parameters.z8_min_idle_time = -// (double) dmub_bb_params->z8_min_idle_time_ns / 1000.0; -// #ifndef TRIM_DML2_DCN6B_IP_SENSITIVE -// if (dmub_bb_params->type_b_dram_clk_change_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.lpddr5_dram_clk_change_blackout_us = -// (double) dmub_bb_params->type_b_dram_clk_change_blackout_ns / 1000.0; -// if (dmub_bb_params->type_b_ppt_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.lpddr5_ppt_blackout_us = -// (double) dmub_bb_params->type_b_ppt_blackout_ns / 1000.0; -// #else -// if (dmub_bb_params->type_b_dram_clk_change_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.type_b_dram_clk_change_blackout_us = -// (double) dmub_bb_params->type_b_dram_clk_change_blackout_ns / 1000.0; -// if (dmub_bb_params->type_b_ppt_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.type_b_ppt_blackout_us = -// (double) dmub_bb_params->type_b_ppt_blackout_ns / 1000.0; -// #endif -// if (dmub_bb_params->vmin_limit_dispclk_khz > 0) -// dml_init->soc_bb.vmin_limit.dispclk_khz = dmub_bb_params->vmin_limit_dispclk_khz; -// if (dmub_bb_params->vmin_limit_dcfclk_khz > 0) -// dml_init->soc_bb.vmin_limit.dcfclk_khz = dmub_bb_params->vmin_limit_dcfclk_khz; -// if (dmub_bb_params->g7_temperature_read_blackout_ns > 0) -// dml_init->soc_bb.power_management_parameters.g7_temperature_read_blackout_us = -// (double) dmub_bb_params->g7_temperature_read_blackout_ns / 1000.0; -} + dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version); -static void override_dml_init_with_values_from_software_policy(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - if (!config->use_native_soc_bb_construction) { + if (config->use_native_soc_bb_construction) { + in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config); + in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps); + } else { dml_init->soc_bb = config->external_socbb_ip_params->soc_bb; dml_init->ip_caps = config->external_socbb_ip_params->ip_params; } - if (in_dc->bb_overrides.sr_exit_time_ns) - dml_init->soc_bb.power_management_parameters.stutter_exit_latency_us = - in_dc->bb_overrides.sr_exit_time_ns / 1000.0; - - if (in_dc->bb_overrides.sr_enter_plus_exit_time_ns) - dml_init->soc_bb.power_management_parameters.stutter_enter_plus_exit_latency_us = - in_dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - - if (in_dc->bb_overrides.dram_clock_change_latency_ns) - dml_init->soc_bb.power_management_parameters.dram_clk_change_blackout_us = - in_dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - - if (in_dc->bb_overrides.fclk_clock_change_latency_ns) - dml_init->soc_bb.power_management_parameters.fclk_change_blackout_us = - in_dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0; -} - -void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - populate_default_dml_init_params(dml_init, config, in_dc); - - override_dml_init_with_values_from_hardware_default(dml_init, config, in_dc); - - override_dml_init_with_values_from_smu(dml_init, config, in_dc); - - override_dml_init_with_values_from_vbios(dml_init, config, in_dc); - - override_dml_init_with_values_from_dmub(dml_init, config, in_dc); - - override_dml_init_with_values_from_software_policy(dml_init, config, in_dc); + dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); } static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) @@ -463,11 +125,7 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); } - if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) { - timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; - } else { - timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz; - } + timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && stream->ctx->dc->config.enable_fpo_flicker_detection == 1) @@ -1165,6 +823,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency; } static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 03de3cf06ae5..798abb2b2e67 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -60,7 +60,7 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, con DC_FP_START(); - dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, config, in_dc); + dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc); dml2_initialize_instance(&dml_ctx->v21.dml_init); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index b05030926ce8..91955bbe24b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -159,6 +159,8 @@ struct dml2_dchub_watermark_regs { uint32_t sr_exit; uint32_t sr_enter_z8; uint32_t sr_exit_z8; + uint32_t sr_enter_low_power; + uint32_t sr_exit_low_power; uint32_t uclk_pstate; uint32_t fclk_pstate; uint32_t temp_read_or_ppt; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 8c9f414aa6bf..176f55947664 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -96,6 +96,8 @@ struct dml2_soc_power_management_parameters { double g7_temperature_read_blackout_us; double stutter_enter_plus_exit_latency_us; double stutter_exit_latency_us; + double low_power_stutter_enter_plus_exit_latency_us; + double low_power_stutter_exit_latency_us; double z8_stutter_enter_plus_exit_latency_us; double z8_stutter_exit_latency_us; double z8_min_idle_time; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 98c0234e2f47..7de10a95cfdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -417,6 +417,8 @@ struct dml2_display_cfg_programming { struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition + uint8_t base_percent_efficiency; //LP1 + uint8_t low_power_percent_efficiency; //LP2 } stutter; struct { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index b9cff2198511..bf62d42b3f78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -1238,18 +1238,27 @@ static void CalculateDETBufferSize( static double CalculateRequiredDispclk( enum dml2_odm_mode ODMMode, - double PixelClock) + double PixelClock, + bool isTMDS420) { + double DispClk; if (ODMMode == dml2_odm_mode_combine_4to1) { - return PixelClock / 4.0; + DispClk = PixelClock / 4.0; } else if (ODMMode == dml2_odm_mode_combine_3to1) { - return PixelClock / 3.0; + DispClk = PixelClock / 3.0; } else if (ODMMode == dml2_odm_mode_combine_2to1) { - return PixelClock / 2.0; + DispClk = PixelClock / 2.0; } else { - return PixelClock; + DispClk = PixelClock; + } + + if (isTMDS420) { + double TMDS420MinPixClock = PixelClock / 2.0; + DispClk = math_max2(DispClk, TMDS420MinPixClock); } + + return DispClk; } static double TruncToValidBPP( @@ -4122,11 +4131,12 @@ static noinline_for_stack void CalculateODMMode( bool success; bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); enum dml2_odm_mode DecidedODMMode; + bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); - SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); #ifdef __DML_VBA_DEBUG__ DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 28394de02885..640087e862f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -10,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 28687565ac22..ffb8c09f37a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -201,6 +201,8 @@ struct dml2_core_internal_watermarks { double WritebackFCLKChangeWatermark; double StutterExitWatermark; double StutterEnterPlusExitWatermark; + double LowPowerStutterExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; double Z8StutterExitWatermark; double Z8StutterEnterPlusExitWatermark; double USRRetrainingWatermark; @@ -877,6 +879,9 @@ struct dml2_core_internal_mode_program { double Z8StutterEfficiency; unsigned int Z8NumberOfStutterBurstsPerFrame; double Z8StutterEfficiencyNotIncludingVBlank; + double LowPowerStutterEfficiency; + double LowPowerStutterEfficiencyNotIncludingVBlank; + unsigned int LowPowerNumberOfStutterBurstsPerFrame; double StutterPeriod; double Z8StutterEfficiencyBestCase; unsigned int Z8NumberOfStutterBurstsPerFrameBestCase; @@ -1016,6 +1021,8 @@ struct dml2_core_internal_SOCParametersList { double FCLKChangeLatency; double SRExitTime; double SREnterPlusExitTime; + double SRExitTimeLowPower; + double SREnterPlusExitTimeLowPower; double SRExitZ8Time; double SREnterPlusExitZ8Time; double USRRetrainingLatency; @@ -1851,9 +1858,11 @@ struct dml2_core_calcs_CalculateStutterEfficiency_params { unsigned int CompbufReservedSpaceZs; bool hw_debug5; double SRExitTime; + double SRExitTimeLowPower; double SRExitZ8Time; bool SynchronizeTimings; double StutterEnterPlusExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; double Z8StutterEnterPlusExitWatermark; bool ProgressiveToInterlaceUnitInOPP; double *MinTTUVBlank; @@ -1879,7 +1888,10 @@ struct dml2_core_calcs_CalculateStutterEfficiency_params { // output double *StutterEfficiencyNotIncludingVBlank; double *StutterEfficiency; + double *LowPowerStutterEfficiencyNotIncludingVBlank; + double *LowPowerStutterEfficiency; unsigned int *NumberOfStutterBurstsPerFrame; + unsigned int *LowPowerNumberOfStutterBurstsPerFrame; double *Z8StutterEfficiencyNotIncludingVBlank; double *Z8StutterEfficiency; unsigned int *Z8NumberOfStutterBurstsPerFrame; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 3861bc6c9621..dfd01440737d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -20,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index cd3fbc0591d8..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -15,7 +15,7 @@ bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance * { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_mcg_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index 7ed0242a4b33..55d2464365d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -26,7 +26,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 5f1b49a50049..4cfe64aa8492 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -473,7 +473,6 @@ static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) { bool sorted, swapped; unsigned int cur_index; - unsigned int temp; int odm_slice_index; for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) { @@ -489,9 +488,8 @@ static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) swapped = false; while (!sorted) { if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) { - temp = pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]; - pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] = pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]; - pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1] = temp; + swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1], + pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]); swapped = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index a56e75cdf712..c59f825cfae9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -654,14 +654,14 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state unsigned int svp_height, unsigned int svp_vstartup) { - unsigned int i, pipe_idx; + unsigned int i; double line_time, fp_and_sync_width_time; struct pipe_ctx *pipe; uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000)); // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { pipe = &state->res_ctx.pipe_ctx[i]; if (!pipe->stream) @@ -669,8 +669,6 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state if (i == dc_pipe_idx) break; - - pipe_idx++; } // Calculate lines required for pstate allow width and FW processing delays diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 0318260370ed..9deb03a18ccc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -535,7 +535,7 @@ static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode v if (result) result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info); - return (result == 1) ? true : false; + return result == 1; } static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index 75fb77bca83b..01480a04f85e 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -520,6 +520,15 @@ void dpp1_dppclk_control( REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 0); } +void dpp_force_disable_cursor(struct dpp *dpp_base) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + /* Force disable cursor */ + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, 0); + dpp_base->pos.cur0_ctl.bits.cur0_enable = 0; +} + static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h index c48139bed11f..f466182963f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h @@ -1525,4 +1525,6 @@ void dpp1_construct(struct dcn10_dpp *dpp1, void dpp1_cm_get_gamut_remap(struct dpp *dpp_base, struct dpp_grph_csc_adjustment *adjust); +void dpp_force_disable_cursor(struct dpp *dpp_base); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 2d70586cef40..09be2a90cc79 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -1494,6 +1494,7 @@ static struct dpp_funcs dcn30_dpp_funcs = { .dpp_dppclk_control = dpp1_dppclk_control, .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap, + .dpp_force_disable_cursor = dpp_force_disable_cursor, }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h index 5a6a861402b3..5f6b431ec398 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h @@ -673,6 +673,16 @@ struct dcn401_dpp { struct pwl_params pwl_data; }; +enum dcn401_dscl_mode_sel { + DCN401_DSCL_MODE_SCALING_444_BYPASS = 0, + DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE = 1, + DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, + DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, + DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, + DCN401_DSCL_MODE_DSCL_BYPASS = 6 +}; + bool dpp401_construct(struct dcn401_dpp *dpp401, struct dc_context *ctx, uint32_t inst, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 2f92e7d4981b..6df3419f825f 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -78,16 +78,6 @@ enum dscl_autocal_mode { AUTOCAL_MODE_AUTOREPLICATE = 3 }; -enum dscl_mode_sel { - DSCL_MODE_SCALING_444_BYPASS = 0, - DSCL_MODE_SCALING_444_RGB_ENABLE = 1, - DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, - DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, - DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, - DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, - DSCL_MODE_DSCL_BYPASS = 6 -}; - static int dpp401_dscl_get_pixel_depth_val(enum lb_pixel_depth depth) { if (depth == LB_PIXEL_DEPTH_30BPP) @@ -122,7 +112,7 @@ static bool dpp401_dscl_is_420_format(enum pixel_format format) return false; } -static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( +static enum dcn401_dscl_mode_sel dpp401_dscl_get_dscl_mode( struct dpp *dpp_base, const struct scaler_data *data, bool dbg_always_scale) @@ -132,7 +122,7 @@ static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL is processing data in fixed format */ if (data->format == PIXEL_FORMAT_FP16) - return DSCL_MODE_DSCL_BYPASS; + return DCN401_DSCL_MODE_DSCL_BYPASS; } if (data->ratios.horz.value == one @@ -140,20 +130,20 @@ static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one && !dbg_always_scale) - return DSCL_MODE_SCALING_444_BYPASS; + return DCN401_DSCL_MODE_SCALING_444_BYPASS; if (!dpp401_dscl_is_420_format(data->format)) { if (dpp401_dscl_is_video_format(data->format)) - return DSCL_MODE_SCALING_444_YCBCR_ENABLE; + return DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE; else - return DSCL_MODE_SCALING_444_RGB_ENABLE; + return DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE; } if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return DSCL_MODE_SCALING_420_LUMA_BYPASS; + return DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS; if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return DSCL_MODE_SCALING_420_CHROMA_BYPASS; + return DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS; - return DSCL_MODE_SCALING_420_YCBCR_ENABLE; + return DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE; } static void dpp401_power_on_dscl( @@ -1071,7 +1061,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, uint32_t v_num_taps_c = scl_data->taps.v_taps_c - 1; uint32_t h_num_taps = scl_data->taps.h_taps - 1; uint32_t h_num_taps_c = scl_data->taps.h_taps_c - 1; - enum dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode( + enum dcn401_dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode( dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; @@ -1102,7 +1092,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp->scl_data = *scl_data; if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) { - dscl_mode = (enum dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode; + dscl_mode = (enum dcn401_dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode; rect = (struct rect *)&scl_data->dscl_prog_data.recout; mpc_width = scl_data->dscl_prog_data.mpc_size.width; mpc_height = scl_data->dscl_prog_data.mpc_size.height; @@ -1112,7 +1102,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, h_num_taps_c = scl_data->dscl_prog_data.taps.h_taps_c; } if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) { - if (dscl_mode != DSCL_MODE_DSCL_BYPASS) + if (dscl_mode != DCN401_DSCL_MODE_DSCL_BYPASS) dpp401_power_on_dscl(dpp_base, true); } @@ -1139,7 +1129,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, /* SCL mode */ REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode); - if (dscl_mode == DSCL_MODE_DSCL_BYPASS) { + if (dscl_mode == DCN401_DSCL_MODE_DSCL_BYPASS) { if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) dpp401_power_on_dscl(dpp_base, false); return; @@ -1149,7 +1139,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, lb_config = dpp401_dscl_find_lb_memory_config(dpp, scl_data); dpp401_dscl_set_lb(dpp, &scl_data->lb_params, lb_config); - if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { + if (dscl_mode == DCN401_DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 1f53a9f0c0ac..e4144b244332 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -1157,6 +1157,11 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; + /* increase miniumum slice count to meet sink slice width limitations */ + min_slices_h = dc_fixpt_ceil(dc_fixpt_max( + dc_fixpt_div_int(dc_fixpt_from_int(pic_width), dsc_common_caps.max_slice_width), // sink min + dc_fixpt_from_int(min_slices_h))); // source min + min_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, min_slices_h); /* increase minimum slice count to meet sink throughput limitations */ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index d347bb06577a..e7e5f6d4778e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -440,6 +440,35 @@ void hubbub3_init_watermarks(struct hubbub *hubbub) REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg); } +void hubbub3_get_det_sizes(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + + REG_GET_2(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, &curr_det_sizes[0], + DET0_SIZE, &target_det_sizes[0]); + + REG_GET_2(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, &curr_det_sizes[1], + DET1_SIZE, &target_det_sizes[1]); + + REG_GET_2(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, &curr_det_sizes[2], + DET2_SIZE, &target_det_sizes[2]); + + REG_GET_2(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, &curr_det_sizes[3], + DET3_SIZE, &target_det_sizes[3]); + +} + +uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + uint32_t compbuf_config_error = 0; + + REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, + &compbuf_config_error); + + return compbuf_config_error; +} + static const struct hubbub_funcs hubbub30_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -457,6 +486,8 @@ static const struct hubbub_funcs hubbub30_funcs = { .force_pstate_change_control = hubbub3_force_pstate_change_control, .init_watermarks = hubbub3_init_watermarks, .hubbub_read_state = hubbub2_read_state, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub3_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h index ca6233e8f1f4..49a469969d36 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h @@ -133,4 +133,10 @@ void hubbub3_force_pstate_change_control(struct hubbub *hubbub, void hubbub3_init_watermarks(struct hubbub *hubbub); +void hubbub3_get_det_sizes(struct hubbub *hubbub, + uint32_t *curr_det_sizes, + uint32_t *target_det_sizes); + +uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index b98505b240a7..cdb20251a154 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -1071,6 +1071,8 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, .hubbub_read_state = hubbub2_read_state, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 32a6be543105..92957398ac0a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -1009,6 +1009,8 @@ static const struct hubbub_funcs hubbub32_funcs = { .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .set_request_limit = hubbub32_set_request_limit, .get_mall_en = hubbub32_get_mall_en, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub32_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 6d41953011f5..a443722a8632 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -589,6 +589,8 @@ static const struct hubbub_funcs hubbub35_funcs = { .hubbub_read_state = hubbub2_read_state, .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .dchubbub_init = hubbub35_init, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub35_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 92fab471b183..a36273a52880 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1247,6 +1247,8 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, .program_arbiter = dcn401_program_arbiter, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index f8f991785d4f..cf2eb9793008 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -104,7 +104,8 @@ SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\ SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\ - SRI(HUBP_CLK_CNTL, HUBP, id) + SRI(HUBP_CLK_CNTL, HUBP, id),\ + SRI(HUBPRET_READ_LINE_VALUE, HUBPRET, id) /* Register address initialization macro for ASICs with VM */ #define HUBP_REG_LIST_DCN_VM(id)\ @@ -249,7 +250,8 @@ uint32_t CURSOR_POSITION; \ uint32_t CURSOR_HOT_SPOT; \ uint32_t CURSOR_DST_OFFSET; \ - uint32_t HUBP_CLK_CNTL + uint32_t HUBP_CLK_CNTL; \ + uint32_t HUBPRET_READ_LINE_VALUE #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -622,6 +624,8 @@ type DCN_VM_SYSTEM_APERTURE_DEFAULT_SYSTEM;\ type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;\ type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;\ + type PIPE_READ_LINE;\ + type HUBP_SEG_ALLOC_ERR_STATUS;\ /* todo: get these from GVM instead of reading registers ourselves */\ type PAGE_DIRECTORY_ENTRY_HI32;\ type PAGE_DIRECTORY_ENTRY_LO32;\ @@ -671,6 +675,7 @@ struct dcn_fl_regs_st { uint32_t lut_done; uint32_t lut_addr_mode; uint32_t lut_width; + uint32_t lut_mpc_width; uint32_t lut_tmz; uint32_t lut_crossbar_sel_r; uint32_t lut_crossbar_sel_g; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index 62369be070ea..f325db555102 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -264,6 +264,7 @@ type HUBP_3DLUT_DONE;\ type HUBP_3DLUT_ADDRESSING_MODE;\ type HUBP_3DLUT_WIDTH;\ + type HUBP_3DLUT_MPC_WIDTH;\ type HUBP_3DLUT_TMZ;\ type HUBP_3DLUT_CROSSBAR_SELECT_Y_G;\ type HUBP_3DLUT_CROSSBAR_SELECT_CB_B;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 0da70b50e86d..556214b2227d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -505,6 +505,30 @@ void hubp3_init(struct hubp *hubp) hubp_reset(hubp); } +uint32_t hubp3_get_current_read_line(struct hubp *hubp) +{ + uint32_t read_line = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(HUBPRET_READ_LINE_VALUE, + PIPE_READ_LINE, + &read_line); + + return read_line; +} + +unsigned int hubp3_get_underflow_status(struct hubp *hubp) +{ + uint32_t hubp_underflow = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, + HUBP_UNDERFLOW_STATUS, + &hubp_underflow); + + return hubp_underflow; +} + static struct hubp_funcs dcn30_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -534,6 +558,8 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index b7d7adf0b58c..842f4eb72cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -243,7 +243,8 @@ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh) + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh) bool hubp3_construct( struct dcn20_hubp *hubp2, @@ -299,6 +300,11 @@ void hubp3_init(struct hubp *hubp); void hubp3_clear_tiling(struct hubp *hubp); +uint32_t hubp3_get_current_read_line(struct hubp *hubp); + +uint32_t hubp3_get_underflow_status(struct hubp *hubp); + + #endif /* __DC_HUBP_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 7fd582a8a4ba..47101847c2b7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -68,6 +68,18 @@ void hubp31_program_extended_blank_value( hubp31_program_extended_blank(hubp, min_dst_y_next_start_optimized); } +uint32_t hubp31_get_det_config_error(struct hubp *hubp) +{ + uint32_t config_error = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, + HUBP_SEG_ALLOC_ERR_STATUS, + &config_error); + + return config_error; +} + static struct hubp_funcs dcn31_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -98,6 +110,9 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h index d688db79b750..5952c4671507 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h @@ -228,7 +228,9 @@ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh) + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh) bool hubp31_construct( @@ -246,4 +248,6 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable); void hubp31_program_extended_blank_value( struct hubp *hubp, unsigned int min_dst_y_next_start_optimized); +uint32_t hubp31_get_det_config_error(struct hubp *hubp); + #endif /* __DC_HUBP_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index f3a21c623f44..a5f23bb2a76a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -206,6 +206,9 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_update_mall_sel = hubp32_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp32_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 6d060ba12da8..b140808f21af 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -218,6 +218,9 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank_value, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp35_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 705b98b1b6cc..0fcbc6a35be6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -127,6 +127,43 @@ void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_forma REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, format); } +void hubp401_program_3dlut_fl_config( + struct hubp *hubp, + struct hubp_fl_3dlut_config *cfg) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + uint32_t mpc_width = {(cfg->width == 17) ? 0 : 1}; + uint32_t width = {cfg->width}; + + if (cfg->layout == DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR) + width = (cfg->width == 17) ? 4916 : 35940; + + REG_UPDATE_2(_3DLUT_FL_CONFIG, + HUBP0_3DLUT_FL_MODE, cfg->mode, + HUBP0_3DLUT_FL_FORMAT, cfg->format); + + REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, + HUBP0_3DLUT_FL_BIAS, cfg->bias, + HUBP0_3DLUT_FL_SCALE, cfg->scale); + + REG_UPDATE(HUBP_3DLUT_ADDRESS_HIGH, + HUBP_3DLUT_ADDRESS_HIGH, cfg->address.lut3d.addr.high_part); + REG_UPDATE(HUBP_3DLUT_ADDRESS_LOW, + HUBP_3DLUT_ADDRESS_LOW, cfg->address.lut3d.addr.low_part); + + //cross bar + REG_UPDATE_8(HUBP_3DLUT_CONTROL, + HUBP_3DLUT_MPC_WIDTH, mpc_width, + HUBP_3DLUT_WIDTH, width, + HUBP_3DLUT_CROSSBAR_SELECT_CR_R, cfg->crossbar_bit_slice_cr_r, + HUBP_3DLUT_CROSSBAR_SELECT_Y_G, cfg->crossbar_bit_slice_y_g, + HUBP_3DLUT_CROSSBAR_SELECT_CB_B, cfg->crossbar_bit_slice_cb_b, + HUBP_3DLUT_ADDRESSING_MODE, cfg->addr_mode, + HUBP_3DLUT_TMZ, cfg->protection_bits, + HUBP_3DLUT_ENABLE, cfg->enabled ? 1 : 0); +} + void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -1033,6 +1070,10 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, .hubp_clear_tiling = hubp401_clear_tiling, + .hubp_program_3dlut_fl_config = hubp401_program_3dlut_fl_config, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index 608e6153fa68..fdabbeec8ffa 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -252,7 +252,9 @@ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P0, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P0, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P1, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh) + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh) void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); @@ -349,6 +351,10 @@ void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_forma void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); +void hubp401_program_3dlut_fl_config( + struct hubp *hubp, + struct hubp_fl_3dlut_config *cfg); + void hubp401_clear_tiling(struct hubp *hubp); void hubp401_vready_at_or_After_vsync(struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 4ea13d0bf815..153d68375fa3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -671,6 +671,7 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) uint32_t early_control = 0; struct timing_generator *tg = pipe_ctx->stream_res.tg; + link_hwss->setup_stream_attribute(pipe_ctx); link_hwss->setup_stream_encoder(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -1269,7 +1270,7 @@ void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) pipe_ctx->stream_res.stream_enc->funcs->set_avmute(pipe_ctx->stream_res.stream_enc, enable); } -static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) +enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) { switch (crtc_id) { case CONTROLLER_ID_D0: @@ -1289,7 +1290,7 @@ static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) } } -static void populate_audio_dp_link_info( +void populate_audio_dp_link_info( const struct pipe_ctx *pipe_ctx, struct audio_dp_link_info *dp_link_info) { @@ -1600,19 +1601,17 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( } if (pipe_ctx->stream_res.audio != NULL) { - struct audio_output audio_output = {0}; + build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output); - build_audio_output(context, pipe_ctx, &audio_output); - - link_hwss->setup_audio_output(pipe_ctx, &audio_output, + link_hwss->setup_audio_output(pipe_ctx, &pipe_ctx->stream_res.audio_output, pipe_ctx->stream_res.audio->inst); pipe_ctx->stream_res.audio->funcs->az_configure( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &audio_output.crtc_info, + &pipe_ctx->stream_res.audio_output.crtc_info, &pipe_ctx->stream->audio_info, - &audio_output.dp_link_info); + &pipe_ctx->stream_res.audio_output.dp_link_info); if (dc->config.disable_hbr_audio_dp2) if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && @@ -2254,7 +2253,7 @@ static bool should_enable_fbc(struct dc *dc, /* * Enable FBC */ -static void enable_fbc( +void enable_fbc( struct dc *dc, struct dc_state *context) { @@ -2386,9 +2385,7 @@ static void dce110_setup_audio_dto( if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A) continue; if (pipe_ctx->stream_res.audio != NULL) { - struct audio_output audio_output; - - build_audio_output(context, pipe_ctx, &audio_output); + build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output); if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) { struct dtbclk_dto_params dto_params = {0}; @@ -2399,14 +2396,14 @@ static void dce110_setup_audio_dto( pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &audio_output.crtc_info, - &audio_output.pll_info); + &pipe_ctx->stream_res.audio_output.crtc_info, + &pipe_ctx->stream_res.audio_output.pll_info); } else pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &audio_output.crtc_info, - &audio_output.pll_info); + &pipe_ctx->stream_res.audio_output.crtc_info, + &pipe_ctx->stream_res.audio_output.pll_info); break; } } @@ -2426,15 +2423,15 @@ static void dce110_setup_audio_dto( continue; if (pipe_ctx->stream_res.audio != NULL) { - struct audio_output audio_output = {0}; - - build_audio_output(context, pipe_ctx, &audio_output); + build_audio_output(context, + pipe_ctx, + &pipe_ctx->stream_res.audio_output); pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &audio_output.crtc_info, - &audio_output.pll_info); + &pipe_ctx->stream_res.audio_output.crtc_info, + &pipe_ctx->stream_res.audio_output.pll_info); break; } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h index 7cd8c1576988..9c032e449481 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h @@ -114,5 +114,12 @@ void build_audio_output( struct dc_state *state, const struct pipe_ctx *pipe_ctx, struct audio_output *audio_output); +enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id); +void populate_audio_dp_link_info( + const struct pipe_ctx *pipe_ctx, + struct audio_dp_link_info *dp_link_info); +void enable_fbc( + struct dc *dc, + struct dc_state *context); #endif /* __DC_HWSS_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 39910f73ecd0..506c3bbbf221 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -328,19 +328,25 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx) } DTN_INFO("\n=======HUBP FL======\n"); - DTN_INFO( - "HUBP FL: Enabled Done adr_mode width tmz xbar_sel_R xbar_sel_G xbar_sel_B adr_hi adr_low REFCYC Bias Scale Mode Format\n"); + static const char * const pLabels[] = { + "inst", "Enabled ", "Done ", "adr_mode ", "width ", "mpc_width ", + "tmz", "xbar_sel_R", "xbar_sel_G", "xbar_sel_B", "adr_hi ", + "adr_low", "REFCYC", "Bias", "Scale", "Mode", + "Format", "prefetch"}; + for (i = 0; i < pool->pipe_count; i++) { struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); struct dcn_fl_regs_st *fl_regs = &s->fl_regs; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr; if (!s->blank_en) { - DTN_INFO("[%2d]: %5xh %6xh %5d %6d %8xh %2xh %6xh %6d %8d %8d %7d %8xh %5x %5x %5x", + uint32_t values[] = { pool->hubps[i]->inst, fl_regs->lut_enable, fl_regs->lut_done, fl_regs->lut_addr_mode, fl_regs->lut_width, + fl_regs->lut_mpc_width, fl_regs->lut_tmz, fl_regs->lut_crossbar_sel_r, fl_regs->lut_crossbar_sel_g, @@ -351,8 +357,13 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx) fl_regs->lut_fl_bias, fl_regs->lut_fl_scale, fl_regs->lut_fl_mode, - fl_regs->lut_fl_format); - DTN_INFO("\n"); + fl_regs->lut_fl_format, + dlg_regs->dst_y_prefetch}; + + int num_elements = 18; + + for (int j = 0; j < num_elements; j++) + DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]); } } @@ -541,19 +552,43 @@ static void dcn10_log_color_state(struct dc *dc, dc->caps.color.mpc.ogam_ram, dc->caps.color.mpc.ocsc); DTN_INFO("===== MPC RMCM 3DLUT =====\n"); - DTN_INFO("MPCC: SIZE MODE MODE_CUR RD_SEL 30BIT_EN WR_EN_MASK RAM_SEL OUT_NORM_FACTOR FL_SEL OUT_OFFSET OUT_SCALE FL_DONE SOFT_UNDERFLOW HARD_UNDERFLOW MEM_PWR_ST FORCE DIS MODE\n"); + static const char * const pLabels[] = { + "MPCC", "SIZE", "MODE", "MODE_CUR", "RD_SEL", + "30BIT_EN", "WR_EN_MASK", "RAM_SEL", "OUT_NORM_FACTOR", "FL_SEL", + "OUT_OFFSET", "OUT_SCALE", "FL_DONE", "SOFT_UNDERFLOW", "HARD_UNDERFLOW", + "MEM_PWR_ST", "FORCE", "DIS", "MODE"}; + for (i = 0; i < pool->mpcc_count; i++) { struct mpcc_state s = {0}; pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s); - if (s.opp_id != 0xf) - DTN_INFO("[%2d]: %4xh %4xh %6xh %4x %4x %4x %4x %4x %4xh %4xh %6xh %4x %4x %4x %4x %4x %4x %4x\n", - i, s.rmcm_regs.rmcm_3dlut_size, s.rmcm_regs.rmcm_3dlut_mode, s.rmcm_regs.rmcm_3dlut_mode_cur, - s.rmcm_regs.rmcm_3dlut_read_sel, s.rmcm_regs.rmcm_3dlut_30bit_en, s.rmcm_regs.rmcm_3dlut_wr_en_mask, - s.rmcm_regs.rmcm_3dlut_ram_sel, s.rmcm_regs.rmcm_3dlut_out_norm_factor, s.rmcm_regs.rmcm_3dlut_fl_sel, - s.rmcm_regs.rmcm_3dlut_out_offset_r, s.rmcm_regs.rmcm_3dlut_out_scale_r, s.rmcm_regs.rmcm_3dlut_fl_done, - s.rmcm_regs.rmcm_3dlut_fl_soft_underflow, s.rmcm_regs.rmcm_3dlut_fl_hard_underflow, s.rmcm_regs.rmcm_3dlut_mem_pwr_state, - s.rmcm_regs.rmcm_3dlut_mem_pwr_force, s.rmcm_regs.rmcm_3dlut_mem_pwr_dis, s.rmcm_regs.rmcm_3dlut_mem_pwr_mode); + if (s.opp_id != 0xf) { + uint32_t values[] = { + i, + s.rmcm_regs.rmcm_3dlut_size, + s.rmcm_regs.rmcm_3dlut_mode, + s.rmcm_regs.rmcm_3dlut_mode_cur, + s.rmcm_regs.rmcm_3dlut_read_sel, + s.rmcm_regs.rmcm_3dlut_30bit_en, + s.rmcm_regs.rmcm_3dlut_wr_en_mask, + s.rmcm_regs.rmcm_3dlut_ram_sel, + s.rmcm_regs.rmcm_3dlut_out_norm_factor, + s.rmcm_regs.rmcm_3dlut_fl_sel, + s.rmcm_regs.rmcm_3dlut_out_offset_r, + s.rmcm_regs.rmcm_3dlut_out_scale_r, + s.rmcm_regs.rmcm_3dlut_fl_done, + s.rmcm_regs.rmcm_3dlut_fl_soft_underflow, + s.rmcm_regs.rmcm_3dlut_fl_hard_underflow, + s.rmcm_regs.rmcm_3dlut_mem_pwr_state, + s.rmcm_regs.rmcm_3dlut_mem_pwr_force, + s.rmcm_regs.rmcm_3dlut_mem_pwr_dis, + s.rmcm_regs.rmcm_3dlut_mem_pwr_mode}; + + int num_elements = 19; + + for (int j = 0; j < num_elements; j++) + DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]); + } } DTN_INFO("\n"); DTN_INFO("===== MPC RMCM Shaper =====\n"); @@ -3628,6 +3663,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) int y_plane = pipe_ctx->plane_state->dst_rect.y; int x_pos = pos_cpy.x; int y_pos = pos_cpy.y; + int clip_x = pipe_ctx->plane_state->clip_rect.x; + int clip_width = pipe_ctx->plane_state->clip_rect.width; if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) { if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) || @@ -3646,7 +3683,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) */ /** - * Translate cursor from stream space to plane space. + * Translate cursor and clip offset from stream space to plane space. * * If the cursor is scaled then we need to scale the position * to be in the approximately correct place. We can't do anything @@ -3663,6 +3700,10 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state->dst_rect.width; y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height / pipe_ctx->plane_state->dst_rect.height; + clip_x = (clip_x - x_plane) * pipe_ctx->plane_state->src_rect.width / + pipe_ctx->plane_state->dst_rect.width; + clip_width = clip_width * pipe_ctx->plane_state->src_rect.width / + pipe_ctx->plane_state->dst_rect.width; } /** @@ -3709,30 +3750,18 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) if (param.rotation == ROTATION_ANGLE_0) { - int viewport_width = - pipe_ctx->plane_res.scl_data.viewport.width; - int viewport_x = - pipe_ctx->plane_res.scl_data.viewport.x; if (param.mirror) { - if (pipe_split_on || odm_combine_on) { - if (pos_cpy.x >= viewport_width + viewport_x) { - pos_cpy.x = 2 * viewport_width - - pos_cpy.x + 2 * viewport_x; - } else { - uint32_t temp_x = pos_cpy.x; - - pos_cpy.x = 2 * viewport_x - pos_cpy.x; - if (temp_x >= viewport_x + - (int)hubp->curs_attr.width || pos_cpy.x - <= (int)hubp->curs_attr.width + - pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; - } - } - } else { - pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x; - } + /* + * The plane is split into multiple viewports. + * The combination of all viewports span the + * entirety of the clip rect. + * + * For no pipe_split, viewport_width is represents + * the full width of the clip_rect, so we can just + * mirror it. + */ + pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x; } } // Swap axis and mirror horizontally @@ -3802,30 +3831,17 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) } // Mirror horizontally and vertically else if (param.rotation == ROTATION_ANGLE_180) { - int viewport_width = - pipe_ctx->plane_res.scl_data.viewport.width; - int viewport_x = - pipe_ctx->plane_res.scl_data.viewport.x; - if (!param.mirror) { - if (pipe_split_on || odm_combine_on) { - if (pos_cpy.x >= viewport_width + viewport_x) { - pos_cpy.x = 2 * viewport_width - - pos_cpy.x + 2 * viewport_x; - } else { - uint32_t temp_x = pos_cpy.x; - - pos_cpy.x = 2 * viewport_x - pos_cpy.x; - if (temp_x >= viewport_x + - (int)hubp->curs_attr.width || pos_cpy.x - <= (int)hubp->curs_attr.width + - pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; - } - } - } else { - pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x; - } + /* + * The plane is split into multiple viewports. + * The combination of all viewports span the + * entirety of the clip rect. + * + * For no pipe_split, viewport_width is represents + * the full width of the clip_rect, so we can just + * mirror it. + */ + pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x; } /** diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 3207addbd4eb..cc377fcda6ff 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -955,7 +955,7 @@ enum dc_status dcn20_enable_stream_timing( return DC_ERROR_UNEXPECTED; } - fsleep(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); + udelay(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; @@ -3054,6 +3054,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) link_enc->transmitter - TRANSMITTER_UNIPHY_A); } + link_hwss->setup_stream_attribute(pipe_ctx); + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) dc->res_pool->dccg->funcs->set_pixel_rate_div( dc->res_pool->dccg, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 37a239219dfe..139a63101488 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -1228,3 +1228,51 @@ void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx) } } } + +void dcn30_get_underflow_debug_data(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + if (tg) { + uint32_t v_blank_start = 0, v_blank_end = 0; + + out_data->otg_inst = tg->inst; + + tg->funcs->get_scanoutpos(tg, + &v_blank_start, + &v_blank_end, + &out_data->h_position, + &out_data->v_position); + + out_data->otg_frame_count = tg->funcs->get_frame_count(tg); + + out_data->otg_underflow = tg->funcs->is_optc_underflow_occurred(tg); + } + + for (int i = 0; i < MAX_PIPES; i++) { + struct hubp *hubp = dc->res_pool->hubps[i]; + + if (hubp) { + if (hubp->funcs->hubp_get_underflow_status) + out_data->hubps[i].hubp_underflow = hubp->funcs->hubp_get_underflow_status(hubp); + + if (hubp->funcs->hubp_in_blank) + out_data->hubps[i].hubp_in_blank = hubp->funcs->hubp_in_blank(hubp); + + if (hubp->funcs->hubp_get_current_read_line) + out_data->hubps[i].hubp_readline = hubp->funcs->hubp_get_current_read_line(hubp); + + if (hubp->funcs->hubp_get_det_config_error) + out_data->hubps[i].det_config_error = hubp->funcs->hubp_get_det_config_error(hubp); + } + } + + if (hubbub->funcs->get_det_sizes) + hubbub->funcs->get_det_sizes(hubbub, out_data->curr_det_sizes, out_data->target_det_sizes); + + if (hubbub->funcs->compbuf_config_error) + out_data->compbuf_config_error = hubbub->funcs->compbuf_config_error(hubbub); + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h index 4b90b781c4f2..40afbbfb5b9c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h @@ -29,6 +29,7 @@ #include "hw_sequencer_private.h" struct dc; +struct dc_underflow_debug_data; void dcn30_init_hw(struct dc *dc); void dcn30_program_all_writeback_pipes_in_tree( @@ -98,4 +99,8 @@ void dcn30_prepare_bandwidth(struct dc *dc, void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx); +void dcn30_get_underflow_debug_data(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data); + #endif /* __DC_HWSS_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 2ac5d54d1626..d7ff55669bac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -110,6 +110,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .update_visual_confirm_color = dcn10_update_visual_confirm_color, .is_abm_supported = dcn21_is_abm_supported, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn30_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 556f4fe57eda..5a6a459da224 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -112,6 +112,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn31_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index e68f21fd5f0f..560984533950 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -528,3 +528,75 @@ void dcn314_disable_link_output(struct dc_link *link, apply_symclk_on_tx_off_wa(link); } + +/** + * dcn314_dpp_pg_control - DPP power gate control. + * + * @hws: dce_hwseq reference. + * @dpp_inst: DPP instance reference. + * @power_on: true if we want to enable power gate, false otherwise. + * + * Enable or disable power gate in the specific DPP instance. + * If power gating is disabled, will force disable cursor in the DPP instance. + */ +void dcn314_dpp_pg_control( + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool power_on) +{ + uint32_t power_gate = power_on ? 0 : 1; + uint32_t pwr_status = power_on ? 0 : 2; + + + if (hws->ctx->dc->debug.disable_dpp_power_gate) { + /* Workaround for DCN314 with disabled power gating */ + if (!power_on) { + + /* Force disable cursor if power gating is disabled */ + struct dpp *dpp = hws->ctx->dc->res_pool->dpps[dpp_inst]; + if (dpp && dpp->funcs->dpp_force_disable_cursor) + dpp->funcs->dpp_force_disable_cursor(dpp); + } + return; + } + if (REG(DOMAIN1_PG_CONFIG) == 0) + return; + + switch (dpp_inst) { + case 0: /* DPP0 */ + REG_UPDATE(DOMAIN1_PG_CONFIG, + DOMAIN1_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN1_PG_STATUS, + DOMAIN1_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 1: /* DPP1 */ + REG_UPDATE(DOMAIN3_PG_CONFIG, + DOMAIN3_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN3_PG_STATUS, + DOMAIN3_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 2: /* DPP2 */ + REG_UPDATE(DOMAIN5_PG_CONFIG, + DOMAIN5_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN5_PG_STATUS, + DOMAIN5_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 3: /* DPP3 */ + REG_UPDATE(DOMAIN7_PG_CONFIG, + DOMAIN7_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN7_PG_STATUS, + DOMAIN7_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index 2305ad282f21..6c072d0274ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -47,4 +47,6 @@ void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); +void dcn314_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index f5112742edf9..79faab1125d4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -115,6 +115,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn314_private_funcs = { @@ -141,6 +142,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .enable_power_gating_plane = dcn314_enable_power_gating_plane, .dpp_root_clock_control = dcn314_dpp_root_clock_control, .hubp_pg_control = dcn31_hubp_pg_control, + .dpp_pg_control = dcn314_dpp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, .update_odm = dcn314_update_odm, .dsc_pg_control = dcn314_dsc_pg_control, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index b971356d30b1..c19ef075c882 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -121,6 +121,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .program_outstanding_updates = dcn32_program_outstanding_updates, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index a267f574b619..764eff6a4ec6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -113,6 +113,14 @@ static void enable_memory_low_power(struct dc *dc) } #endif +static void print_pg_status(struct dc *dc, const char *debug_func, const char *debug_log) +{ + if (dc->debug.enable_pg_cntl_debug_logs && dc->res_pool->pg_cntl) { + if (dc->res_pool->pg_cntl->funcs->print_pg_status) + dc->res_pool->pg_cntl->funcs->print_pg_status(dc->res_pool->pg_cntl, debug_func, debug_log); + } +} + void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable) { REG_UPDATE_3(DMU_CLK_CNTL, @@ -137,6 +145,8 @@ void dcn35_init_hw(struct dc *dc) uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; + print_pg_status(dc, __func__, ": start"); + if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -200,10 +210,7 @@ void dcn35_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc->link_srv->blank_all_dp_displays(dc); -/* - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); -*/ + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first @@ -236,6 +243,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); + print_pg_status(dc, __func__, ": after init_pipes"); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, @@ -312,6 +321,7 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); } + print_pg_status(dc, __func__, ": after init_pg_status"); } static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) @@ -500,97 +510,6 @@ void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_ } } -void dcn35_dsc_pg_control( - struct dce_hwseq *hws, - unsigned int dsc_inst, - bool power_on) -{ - uint32_t power_gate = power_on ? 0 : 1; - uint32_t pwr_status = power_on ? 0 : 2; - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_dsc_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - - switch (dsc_inst) { - case 0: /* DSC0 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN16_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 1: /* DSC1 */ - REG_UPDATE(DOMAIN17_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN17_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 2: /* DSC2 */ - REG_UPDATE(DOMAIN18_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN18_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 3: /* DSC3 */ - REG_UPDATE(DOMAIN19_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN19_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - default: - BREAK_TO_DEBUGGER(); - break; - } - - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); -} - -void dcn35_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) -{ - bool force_on = true; /* disable power gating */ - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_hubp_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - /* DCHUBP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - /* DPP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - force_on = true; /* disable power gating */ - if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) - force_on = false; - - /* DCS0/1/2/3/4 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - -} - /* In headless boot cases, DIG may be turned * on which causes HW/SW discrepancies. * To avoid this, power down hardware on boot @@ -1453,6 +1372,8 @@ void dcn35_prepare_bandwidth( } dcn20_prepare_bandwidth(dc, context); + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_optimize_bandwidth( @@ -1461,6 +1382,8 @@ void dcn35_optimize_bandwidth( { struct pg_block_update pg_update_state; + print_pg_status(dc, __func__, ": before rcg and power up"); + dcn20_optimize_bandwidth(dc, context); if (dc->hwss.calc_blocks_to_gate) { @@ -1472,6 +1395,8 @@ void dcn35_optimize_bandwidth( if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, false); } + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_set_drr(struct pipe_ctx **pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index a3ccf805bd16..f2f16a0bdb4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -115,7 +115,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, .hw_block_power_up = dcn35_hw_block_power_up, @@ -128,6 +127,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .enable_plane = dcn20_enable_plane, .update_dchubp_dpp = dcn20_update_dchubp_dpp, .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn35_private_funcs = { @@ -150,7 +150,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -165,7 +164,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 58f2be2a326b..09e60158f0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -114,7 +114,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn351_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn351_calc_blocks_to_ungate, .hw_block_power_up = dcn351_hw_block_power_up, @@ -123,6 +122,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn351_private_funcs = { @@ -145,7 +145,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -159,7 +158,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index cc9f40d97af2..d5b5e2ce6ff6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -965,6 +965,8 @@ void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) } } + link_hwss->setup_stream_attribute(pipe_ctx); + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) { dc->res_pool->dccg->funcs->set_pixel_rate_div( dc->res_pool->dccg, @@ -1619,20 +1621,28 @@ void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, void dcn401_hardware_release(struct dc *dc) { - dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); - - /* If pstate unsupported, or still supported - * by firmware, force it supported by dcn - */ - if (dc->current_state) { - if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && - dc->res_pool->hubbub->funcs->force_pstate_change_control) - dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, true, true); - - dc->current_state->bw_ctx.bw.dcn.clk.p_state_change_support = true; - dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + if (!dc->debug.disable_force_pstate_allow_on_hw_release) { + dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); + + /* If pstate unsupported, or still supported + * by firmware, force it supported by dcn + */ + if (dc->current_state) { + if ((!dc->clk_mgr->clks.p_state_change_support || + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && + dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, true); + + dc->current_state->bw_ctx.bw.dcn.clk.p_state_change_support = true; + dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + } + } else { + if (dc->current_state) { + dc->clk_mgr->clks.p_state_change_support = false; + dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + } + dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index fe7aceb2f510..d6e11b7e4fce 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -104,6 +104,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_plane = dcn20_enable_plane, .update_dchubp_dpp = dcn20_update_dchubp_dpp, .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 9df8030e37f7..1723bbcf2c46 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -47,6 +47,7 @@ struct link_resource; struct dc_dmub_cmd; struct pg_block_update; struct drr_params; +struct dc_underflow_debug_data; struct subvp_pipe_control_lock_fast_params { struct dc *dc; @@ -475,6 +476,9 @@ struct hw_sequencer_funcs { struct dc_state *context); void (*post_unlock_reset_opp)(struct dc *dc, struct pipe_ctx *opp_head); + void (*get_underflow_debug_data)(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data); }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index f0d7185153b2..d30f94c35f11 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -228,8 +228,7 @@ struct resource_funcs { enum dc_status (*update_dc_state_for_encoder_switch)(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes, - struct audio_output *audio_output); + struct pipe_ctx *pipes); }; struct audio_support{ @@ -361,6 +360,8 @@ struct stream_resource { uint8_t gsl_group; struct test_pattern_params test_pattern_params; + + struct audio_output audio_output; }; struct plane_resource { @@ -433,7 +434,7 @@ enum p_state_switch_method { P_STATE_V_ACTIVE, P_STATE_SUB_VP, P_STATE_DRR_SUB_VP, - P_STATE_V_BLANK_SUB_VP + P_STATE_V_BLANK_SUB_VP, }; struct pipe_ctx { @@ -683,6 +684,7 @@ struct replay_context { /* Controller Id used for Dig Fe source select */ enum controller_id controllerId; unsigned int line_time_in_ns; + bool os_request_force_ffu; }; enum dc_replay_enable { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 52b745667ef7..843a18287c83 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -137,6 +137,19 @@ struct dcn_hubbub_state { uint32_t dram_state_cntl; }; +struct hubbub_system_latencies { + uint32_t max_latency_ns; + uint32_t avg_latency_ns; + uint32_t min_latency_ns; +}; + +struct hubbub_urgent_latency_params { + uint32_t refclk_mhz; + uint32_t t_win_ns; + uint32_t bandwidth_mbps; + uint32_t bw_factor_x1000; +}; + struct hubbub_funcs { void (*update_dchub)( struct hubbub *hubbub, @@ -229,6 +242,17 @@ struct hubbub_funcs { void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); + void (*get_det_sizes)(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes); + uint32_t (*compbuf_config_error)(struct hubbub *hubbub); + struct hubbub_perfmon_funcs{ + void (*start_system_latency_measurement)(struct hubbub *hubbub); + void (*get_system_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, struct hubbub_system_latencies *latencies); + void (*start_in_order_bandwidth_measurement)(struct hubbub *hubbub); + void (*get_in_order_bandwidth_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *bandwidth_mbps); + void (*start_urgent_ramp_latency_measurement)(struct hubbub *hubbub, const struct hubbub_urgent_latency_params *params); + void (*get_urgent_ramp_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *latency_ns); + void (*reset)(struct hubbub *hubbub); + } perfmon; }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 0c5675d1c593..1b7c085dc2cc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -349,6 +349,9 @@ struct dpp_funcs { struct dpp *dpp_base, enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix); + + void (*dpp_force_disable_cursor)(struct dpp *dpp_base); + }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index cee29e89ec5c..2b874d2cc61c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -89,7 +89,7 @@ enum hubp_3dlut_fl_addressing_mode { enum hubp_3dlut_fl_width { hubp_3dlut_fl_width_17 = 17, hubp_3dlut_fl_width_33 = 33, - hubp_3dlut_fl_width_transformed = 4916 + hubp_3dlut_fl_width_transformed = 4916, //mpc default }; enum hubp_3dlut_fl_crossbar_bit_slice { @@ -99,6 +99,22 @@ enum hubp_3dlut_fl_crossbar_bit_slice { hubp_3dlut_fl_crossbar_bit_slice_48_63 = 3 }; +struct hubp_fl_3dlut_config { + bool enabled; + enum hubp_3dlut_fl_width width; + enum hubp_3dlut_fl_mode mode; + enum hubp_3dlut_fl_format format; + uint16_t bias; + uint16_t scale; + struct dc_plane_address address; + enum hubp_3dlut_fl_addressing_mode addr_mode; + enum dc_cm2_gpu_mem_layout layout; + uint8_t protection_bits; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r; +}; + struct hubp { const struct hubp_funcs *funcs; struct dc_context *ctx; @@ -288,7 +304,10 @@ struct hubp_funcs { enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); int (*hubp_get_3dlut_fl_done)(struct hubp *hubp); + void (*hubp_program_3dlut_fl_config)(struct hubp *hubp, struct hubp_fl_3dlut_config *cfg); void (*hubp_clear_tiling)(struct hubp *hubp); + uint32_t (*hubp_get_current_read_line)(struct hubp *hubp); + uint32_t (*hubp_get_det_config_error)(struct hubp *hubp); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 7641439f6ca0..22960ee03dee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -115,6 +115,16 @@ enum MCM_LUT_ID { MCM_LUT_SHAPER }; +struct mpc_fl_3dlut_config { + bool enabled; + uint16_t width; + bool select_lut_bank_a; + uint16_t bit_depth; + int hubp_index; + uint16_t bias; + uint16_t scale; +}; + union mcm_lut_params { const struct pwl_params *pwl; const struct tetrahedral_params *lut3d; @@ -1059,21 +1069,6 @@ struct mpc_funcs { */ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable, bool lut_bank_a, int mpcc_id); - /** - * @program_3dlut_size: - * - * Program 3D LUT size. - * - * Parameters: - * - [in/out] mpc - MPC context. - * - [in] is_17x17x17 - is 3dlut 17x17x17 - * - [in] mpcc_id - * - * Return: - * - * void - */ - void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); /** * @mcm: @@ -1098,6 +1093,7 @@ struct mpc_funcs { * MPC RMCM new HW sequential programming functions */ struct { + void (*fl_3dlut_configure)(struct mpc *mpc, struct mpc_fl_3dlut_config *cfg, int mpcc_id); void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id); void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 44f86cc2d1d6..227e3f8d7e5f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -49,6 +49,7 @@ struct pg_cntl_funcs { void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); + void (*print_pg_status)(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 267ace4eef8a..f2de2cf23859 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -374,6 +374,7 @@ struct timing_generator_funcs { void (*wait_drr_doublebuffer_pending_clear)(struct timing_generator *tg); void (*set_long_vtotal)(struct timing_generator *optc, const struct long_vtotal_params *params); void (*wait_odm_doublebuffer_pending_clear)(struct timing_generator *tg); + void (*wait_otg_disable)(struct timing_generator *optc); bool (*get_optc_double_buffer_pending)(struct timing_generator *tg); bool (*get_otg_double_buffer_pending)(struct timing_generator *tg); bool (*get_pipe_update_pending)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index f2503402c10e..0cce49d95e26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -218,7 +218,10 @@ struct link_service { bool (*dp_overwrite_extended_receiver_cap)(struct dc_link *link); enum lttpr_mode (*dp_decide_lttpr_mode)(struct dc_link *link, struct dc_link_settings *link_setting); - + uint8_t (*dp_get_lttpr_count)(struct dc_link *link); + void (*edp_get_alpm_support)(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support); /*************************** DP DPIA/PHY ******************************/ void (*dpia_handle_usb4_bandwidth_allocation_for_link)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h new file mode 100644 index 000000000000..23daf98b8aa8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef __SOC_AND_IP_TRANSLATOR_H__ +#define __SOC_AND_IP_TRANSLATOR_H__ + +#include "dc.h" +#include "dml_top_soc_parameter_types.h" + +struct soc_and_ip_translator_funcs { + void (*get_soc_bb)(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); + void (*get_ip_caps)(struct dml2_ip_capabilities *dml_ip_caps); +}; + +struct soc_and_ip_translator { + const struct soc_and_ip_translator_funcs *translator_funcs; +}; + +struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version); +void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator); + + +#endif // __SOC_AND_IP_TRANSLATOR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 2956c2b3ad1a..23f41c99fa38 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -75,7 +75,9 @@ static void dp_retrain_link_dp_test(struct dc_link *link, bool is_hpo_acquired; uint8_t count; int i; - struct audio_output audio_output[MAX_PIPES]; + + struct dc_stream_state *streams_on_link[MAX_PIPES]; + int num_streams_on_link = 0; needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); @@ -99,7 +101,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (needs_divider_update && link->dc->res_pool->funcs->update_dc_state_for_encoder_switch) { link->dc->res_pool->funcs->update_dc_state_for_encoder_switch(link, link_setting, count, - *pipes, &audio_output[0]); + *pipes); for (i = 0; i < count; i++) { pipes[i]->clock_source->funcs->program_pix_clk( pipes[i]->clock_source, @@ -111,15 +113,16 @@ static void dp_retrain_link_dp_test(struct dc_link *link, const struct link_hwss *link_hwss = get_link_hwss( link, &pipes[i]->link_res); - link_hwss->setup_audio_output(pipes[i], &audio_output[i], - pipes[i]->stream_res.audio->inst); + link_hwss->setup_audio_output(pipes[i], + &pipes[i]->stream_res.audio_output, + pipes[i]->stream_res.audio->inst); pipes[i]->stream_res.audio->funcs->az_configure( pipes[i]->stream_res.audio, pipes[i]->stream->signal, - &audio_output[i].crtc_info, + &pipes[i]->stream_res.audio_output.crtc_info, &pipes[i]->stream->audio_info, - &audio_output[i].dp_link_info); + &pipes[i]->stream_res.audio_output.dp_link_info); if (link->dc->config.disable_hbr_audio_dp2 && pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio && @@ -138,12 +141,19 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]->stream_res.tg->funcs->enable_crtc(pipes[i]->stream_res.tg); // Set DPMS on with stream update - for (i = 0; i < state->stream_count; i++) - if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link) { - stream_update.stream = state->streams[i]; + // Cache all streams on current link since dc_update_planes_and_stream might kill current_state + for (i = 0; i < MAX_PIPES; i++) { + if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link) + streams_on_link[num_streams_on_link++] = state->streams[i]; + } + + for (i = 0; i < num_streams_on_link; i++) { + if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { + stream_update.stream = streams_on_link[i]; stream_update.dpms_off = &dpms_off; - dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, state->streams[i], &stream_update); + dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update); } + } } static void dp_test_send_link_training(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b68bcc9fca0a..892907991f91 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -138,8 +138,7 @@ void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx) stream_encoder->funcs->dvi_set_stream_attribute( stream_encoder, &stream->timing, - (stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? - true : false); + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK); else if (dc_is_lvds_signal(stream->signal)) stream_encoder->funcs->lvds_set_stream_attribute( stream_encoder, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 827b630daf49..b717e430051a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -656,7 +656,7 @@ static bool wait_for_entering_dp_alt_mode(struct dc_link *link) return true; is_in_alt_mode = link->link_enc->funcs->is_in_alt_mode(link->link_enc); - DC_LOG_DC("DP Alt mode state on HPD: %d\n", is_in_alt_mode); + DC_LOG_DC("DP Alt mode state on HPD: %d Link=%d\n", is_in_alt_mode, link->link_index); if (is_in_alt_mode) return true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 8c8682f743d6..08ee8d2f777b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2358,9 +2358,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->sink) { if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL && pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) { - DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__, + DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__, pipe_ctx->stream->sink->edid_caps.display_name, - pipe_ctx->stream->signal); + pipe_ctx->stream->signal, link->link_index); } } @@ -2458,7 +2458,6 @@ void link_set_dpms_on( struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc; enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg; - const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); bool apply_edp_fast_boot_optimization = pipe_ctx->stream->apply_edp_fast_boot_optimization; @@ -2474,9 +2473,10 @@ void link_set_dpms_on( if (pipe_ctx->stream->sink) { if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL && pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) { - DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__, + DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__, pipe_ctx->stream->sink->edid_caps.display_name, - pipe_ctx->stream->signal); + pipe_ctx->stream->signal, + link->link_index); } } @@ -2502,8 +2502,6 @@ void link_set_dpms_on( pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); } - link_hwss->setup_stream_attribute(pipe_ctx); - pipe_ctx->stream->apply_edp_fast_boot_optimization = false; // Enable VPG before building infoframe diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index de1143dbbd25..31a73867cd4c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -165,6 +165,8 @@ static void construct_link_service_dp_capability(struct link_service *link_srv) link_srv->dp_overwrite_extended_receiver_cap = dp_overwrite_extended_receiver_cap; link_srv->dp_decide_lttpr_mode = dp_decide_lttpr_mode; + link_srv->dp_get_lttpr_count = dp_get_lttpr_count; + link_srv->edp_get_alpm_support = edp_get_alpm_support; } /* link dp phy/dpia implements basic dp phy/dpia functionality such as diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index aecaf37eee35..acdc162de535 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -408,8 +408,10 @@ enum dc_status link_validate_dp_tunnel_bandwidth(const struct dc *dc, const stru link = stream->link; if (!(link && (stream->signal == SIGNAL_TYPE_DISPLAY_PORT - || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) - && link->hpd_status)) + || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST))) + continue; + + if ((link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) && (link->hpd_status == false)) continue; dp_tunnel_settings = get_dp_tunnel_settings(new_ctx, stream); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 651926e547b9..b12c11bd6a14 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1525,8 +1525,8 @@ bool read_is_mst_supported(struct dc_link *link) return false; } - rev.raw = 0; - cap.raw = 0; + rev.raw = 0; + cap.raw = 0; st = core_link_read_dpcd(link, DP_DPCD_REV, &rev.raw, sizeof(rev)); @@ -2125,13 +2125,13 @@ void detect_edp_sink_caps(struct dc_link *link) &backlight_adj_cap, sizeof(backlight_adj_cap)); link->dpcd_caps.dynamic_backlight_capable_edp = - (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true:false; + (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true : false; core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_1, &general_edp_cap, sizeof(general_edp_cap)); link->dpcd_caps.set_power_state_capable_edp = - (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true:false; + (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true : false; set_default_brightness_aux(link); @@ -2195,6 +2195,12 @@ void detect_edp_sink_caps(struct dc_link *link) DP_EDP_MSO_LINK_CAPABILITIES, (uint8_t *)&link->dpcd_caps.mso_cap_sst_links_supported, sizeof(link->dpcd_caps.mso_cap_sst_links_supported)); + /* + * Read eDP general capability 2 + */ + core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2, + (uint8_t *)&link->dpcd_caps.dp_edp_general_cap_2, + sizeof(link->dpcd_caps.dp_edp_general_cap_2)); } bool dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap) @@ -2506,3 +2512,40 @@ bool dp_is_sink_present(struct dc_link *link) return present; } + +uint8_t dp_get_lttpr_count(struct dc_link *link) +{ + if (dp_is_lttpr_present(link)) + return dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + + return 0; +} + +void edp_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support) +{ + bool lttpr_present = dp_is_lttpr_present(link); + + if (auxless_support == NULL || auxwake_support == NULL) + return; + + *auxless_support = false; + *auxwake_support = false; + + if (!dc_is_embedded_signal(link->connector_signal)) + return; + + if (link->dpcd_caps.alpm_caps.bits.AUX_LESS_ALPM_CAP) { + if (lttpr_present) { + if (link->dpcd_caps.lttpr_caps.alpm.bits.AUX_LESS_ALPM_SUPPORTED) + *auxless_support = true; + } else + *auxless_support = true; + } + + if (link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP) { + if (!lttpr_present) + *auxwake_support = true; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h index 940b147cc5d4..7170db5a1c13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h @@ -108,4 +108,10 @@ uint32_t link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw); bool dp_overwrite_extended_receiver_cap(struct dc_link *link); +uint8_t dp_get_lttpr_count(struct dc_link *link); + +void edp_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support); + #endif /* __DC_LINK_DP_CAPABILITY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 819bf2d8ba53..8a3c18ae97a7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -48,8 +48,7 @@ */ static bool link_dp_is_bw_alloc_available(struct dc_link *link) { - return (link && link->hpd_status - && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling + return (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support); } @@ -226,35 +225,40 @@ bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) bool ret = false; uint8_t val; - if (link->hpd_status) { - val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; + if (link->dc->debug.dpia_debug.bits.enable_bw_allocation_mode == false) { + DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode disabled", __func__, link->link_index); + return false; + } - if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { - DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index); + val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; - retrieve_usb4_dp_bw_allocation_info(link); + if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { + DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index); - if (link->dpia_bw_alloc_config.nrd_max_link_rate && link->dpia_bw_alloc_config.nrd_max_lane_count) { - link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; - link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; - } + retrieve_usb4_dp_bw_allocation_info(link); - link->dpia_bw_alloc_config.bw_alloc_enabled = true; - ret = true; - - if (link->dc->debug.dpia_debug.bits.enable_usb4_bw_zero_alloc_patch) { - /* - * During DP tunnel creation, the CM preallocates BW - * and reduces the estimated BW of other DPIAs. - * The CM releases the preallocation only when the allocation is complete. - * Perform a zero allocation to make the CM release the preallocation - * and correctly update the estimated BW for all DPIAs per host router. - */ - link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0); - } - } else - DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index); - } + if ( + link->dpia_bw_alloc_config.nrd_max_link_rate + && link->dpia_bw_alloc_config.nrd_max_lane_count) { + link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; + link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; + } + + link->dpia_bw_alloc_config.bw_alloc_enabled = true; + ret = true; + + if (link->dc->debug.dpia_debug.bits.enable_usb4_bw_zero_alloc_patch) { + /* + * During DP tunnel creation, the CM preallocates BW + * and reduces the estimated BW of other DPIAs. + * The CM releases the preallocation only when the allocation is complete. + * Perform a zero allocation to make the CM release the preallocation + * and correctly update the estimated BW for all DPIAs per host router. + */ + link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0); + } + } else + DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index); return ret; } @@ -297,15 +301,12 @@ void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pe { if (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling && link->dpia_bw_alloc_config.bw_alloc_enabled) { - //1. Hot Plug - if (link->hpd_status && peak_bw > 0) { + if (peak_bw > 0) { // If DP over USB4 then we need to check BW allocation link->dpia_bw_alloc_config.link_max_bw = peak_bw; link_dpia_send_bw_alloc_request(link, peak_bw); - } - //2. Cold Unplug - else if (!link->hpd_status) + } else dpia_bw_alloc_unplug(link); } } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 2dc1a660e504..134093ce5a8e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1018,7 +1018,12 @@ static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, e { enum dc_status status; uint8_t sink_status = 0; - uint8_t i; + uint32_t i; + uint8_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t intra_hop_disable_time_ms = (lttpr_count > 0 ? lttpr_count * 300 : 10); + + // Each hop could theoretically take over 256ms (max 128b/132b AUX RD INTERVAL) + // To be safe, allow 300ms per LTTPR and 10ms for no LTTPR case /* clear training pattern set */ status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); @@ -1028,7 +1033,7 @@ static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, e if (encoding == DP_128b_132b_ENCODING) { /* poll for intra-hop disable */ - for (i = 0; i < 10; i++) { + for (i = 0; i < intra_hop_disable_time_ms; i++) { if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) break; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e7927b8f5ba3..8b7b87b21c2e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -870,6 +870,8 @@ bool edp_setup_psr(struct dc_link *link, psr_context->dsc_slice_height = psr_config->dsc_slice_height; + psr_context->os_request_force_ffu = psr_config->os_request_force_ffu; + if (psr) { link->psr_settings.psr_feature_enabled = psr->funcs->psr_copy_settings(psr, link, psr_context, panel_inst); @@ -944,7 +946,7 @@ bool edp_set_replay_allow_active(struct dc_link *link, const bool *allow_active, // TODO: Handle mux change case if force_static is set // If force_static is set, just change the replay_allow_active state directly if (replay != NULL && link->replay_settings.replay_feature_enabled) - replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst, link); + replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst); link->replay_settings.replay_allow_active = *allow_active; } @@ -1029,6 +1031,8 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream replay_context.line_time_in_ns = lineTimeInNs; + replay_context.os_request_force_ffu = link->replay_settings.config.os_request_force_ffu; + link->replay_settings.replay_feature_enabled = replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); if (link->replay_settings.replay_feature_enabled) { @@ -1042,7 +1046,13 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream (uint8_t *)&(replay_config.raw), sizeof(uint8_t)); memset(&alpm_config, 0, sizeof(alpm_config)); - alpm_config.bits.ENABLE = 1; + alpm_config.bits.ENABLE = link->replay_settings.config.alpm_mode != DC_ALPM_UNSUPPORTED ? 1 : 0; + + if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) { + alpm_config.bits.ALPM_MODE_SEL = 1; + alpm_config.bits.ACDS_PERIOD_DURATION = 0; + } + dm_helpers_dp_write_dpcd( link->ctx, link, diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c index 259a98e4ee2c..2a422e223bf2 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c @@ -284,7 +284,7 @@ void mcifwb2_dump_frame(struct mcif_wb *mcif_wb, REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0xf); - memcpy(dest_luma_buffer, luma_buffer, mcif_params->luma_pitch * dest_height); + memcpy(dest_luma_buffer, luma_buffer, (size_t)mcif_params->luma_pitch * dest_height); memcpy(dest_chroma_buffer, chroma_buffer, mcif_params->chroma_pitch * dest_height / 2); REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0x0); diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index f3fb3fe13757..e1a0308dee57 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -287,13 +287,6 @@ void mpc401_program_lut_read_write_control(struct mpc *mpc, const enum MCM_LUT_I } } -void mpc401_program_3dlut_size(struct mpc *mpc, bool is_17x17x17, int mpcc_id) -{ - struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); - - REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_SIZE, is_17x17x17 ? 0 : 1); -} - void mpc_program_gamut_remap( struct mpc *mpc, unsigned int mpcc_id, @@ -611,7 +604,6 @@ static const struct mpc_funcs dcn401_mpc_funcs = { .populate_lut = mpc401_populate_lut, .program_lut_read_write_control = mpc401_program_lut_read_write_control, .program_lut_mode = mpc401_program_lut_mode, - .program_3dlut_size = mpc401_program_3dlut_size, }; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h index eb0c68d0b0c7..fdc42f8ab3ff 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h @@ -221,11 +221,6 @@ void mpc401_program_lut_read_write_control( bool lut_bank_a, int mpcc_id); -void mpc401_program_3dlut_size( - struct mpc *mpc, - bool is_17x17x17, - int mpcc_id); - void mpc401_set_gamut_remap( struct mpc *mpc, int mpcc_id, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h index d159e3ed3bb3..ead92ad78a23 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h @@ -62,6 +62,7 @@ SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 72bff94cb57d..52d5ea98c86b 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -162,6 +162,8 @@ static bool optc35_disable_crtc(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + REG_WAIT(OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000); + optc1_clear_optc_underflow(optc); return true; @@ -428,6 +430,21 @@ static void optc35_set_long_vtotal( } } +static void optc35_wait_otg_disable(struct timing_generator *optc) +{ + struct optc *optc1; + uint32_t is_master_en; + + if (!optc || !optc->ctx) + return; + + optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OTG_CONTROL, OTG_MASTER_EN, &is_master_en); + if (!is_master_en) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000); +} + static const struct timing_generator_funcs dcn35_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -479,6 +496,7 @@ static const struct timing_generator_funcs dcn35_tg_funcs = { .set_odm_bypass = optc32_set_odm_bypass, .set_odm_combine = optc35_set_odm_combine, .get_optc_source = optc2_get_optc_source, + .wait_otg_disable = optc35_wait_otg_disable, .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, .set_out_mux = optc3_set_out_mux, .set_drr_trigger_window = optc3_set_drr_trigger_window, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index ff79c38287df..5af13706e601 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -226,6 +226,11 @@ bool optc401_disable_crtc(struct timing_generator *optc) REG_UPDATE(CONTROL, VTG0_ENABLE, 0); + // wait until CRTC_CURRENT_MASTER_EN_STATE == 0 + REG_WAIT(OTG_CONTROL, + OTG_CURRENT_MASTER_EN_STATE, + 0, 10, 15000); + /* CRTC disabled, so disable clock. */ REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c index af21c0a27f86..72bd43f9bbe2 100644 --- a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c @@ -79,16 +79,12 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl = 0; - bool block_enabled; - - /*need to enable dscclk regardless DSC_PG*/ - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc && power_on) - pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); + bool block_enabled = false; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_dsc_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_dsc_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, dsc_inst); @@ -111,7 +107,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 1: /* DSC1 */ REG_UPDATE(DOMAIN17_PG_CONFIG, @@ -119,7 +115,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 2: /* DSC2 */ REG_UPDATE(DOMAIN18_PG_CONFIG, @@ -127,7 +123,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 3: /* DSC3 */ REG_UPDATE(DOMAIN19_PG_CONFIG, @@ -135,7 +131,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -144,12 +140,6 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo if (dsc_inst < MAX_PIPES) pg_cntl->pg_pipe_res_enable[PG_DSC][dsc_inst] = power_on; - - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on) { - /*this is to disable dscclk*/ - pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); - } } static bool pg_cntl35_hubp_dpp_pg_status(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst) @@ -189,11 +179,12 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; bool block_enabled; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_hubp_power_gate || + pg_cntl->ctx->dc->debug.disable_dpp_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_hubp_power_gate || - pg_cntl->ctx->dc->debug.disable_dpp_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, hubp_dpp_inst); @@ -213,22 +204,22 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp case 0: /* DPP0 & HUBP0 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 1: /* DPP1 & HUBP1 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 2: /* DPP2 & HUBP2 */ REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 3: /* DPP3 & HUBP3 */ REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -501,6 +492,36 @@ void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl) pg_cntl->pg_res_enable[PG_DWB] = block_enabled; } +static void pg_cntl35_print_pg_status(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log) +{ + int i = 0; + bool block_enabled = false; + + DC_LOG_DEBUG("%s: %s", debug_func, debug_log); + + DC_LOG_DEBUG("PG_CNTL status:\n"); + + block_enabled = pg_cntl35_io_clk_status(pg_cntl); + DC_LOG_DEBUG("ONO0=%d (DCCG, DIO, DCIO)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_mem_status(pg_cntl); + DC_LOG_DEBUG("ONO1=%d (DCHUBBUB, DCHVM, DCHUBBUBMEM)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_plane_otg_status(pg_cntl); + DC_LOG_DEBUG("ONO2=%d (MPC, OPP, OPTC, DWB)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_hpo_pg_status(pg_cntl); + DC_LOG_DEBUG("ONO3=%d (HPO)\n", block_enabled ? 1 : 0); + + for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) { + block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DCHUBP%d, DPP%d)\n", 4 + i * 2, block_enabled ? 1 : 0, i, i); + + block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DSC%d)\n", 5 + i * 2, block_enabled ? 1 : 0, i); + } +} + static const struct pg_cntl_funcs pg_cntl35_funcs = { .init_pg_status = pg_cntl35_init_pg_status, .dsc_pg_control = pg_cntl35_dsc_pg_control, @@ -511,7 +532,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control + .dwb_pg_control = pg_cntl35_dwb_pg_control, + .print_pg_status = pg_cntl35_print_pg_status }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 164ba796f64c..869a8e515fc0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -1111,12 +1111,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * memory_type_multiplier, 1000); + (int64_t)clks.clocks_in_khz[0] * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, + (int64_t)clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, + (int64_t)clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, 1000); return; @@ -1152,12 +1152,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index eb1e158d3436..2f23cc6df571 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -990,12 +990,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) memory_type_multiplier = MEMORY_TYPE_HBM; dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 895349d9ca07..201ed863b69e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2192,7 +2192,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 3ed7f50554e2..ca17e5d8fdc2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -2239,8 +2239,7 @@ struct resource_pool *dcn31_create_resource_pool( enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes, - struct audio_output *audio_output) + struct pipe_ctx *pipes) { struct dc_state *state = link->dc->current_state; int i; @@ -2255,7 +2254,7 @@ enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, // Setup audio if (pipes[i].stream_res.audio != NULL) - build_audio_output(state, &pipes[i], &audio_output[i]); + build_audio_output(state, &pipes[i], &pipes[i].stream_res.audio_output); } #else /* This DCN requires rate divider updates and audio reprogramming to allow DP1<-->DP2 link rate switching, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index c32c85ef0ba4..7e8fde65528f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -69,8 +69,7 @@ unsigned int dcn31_get_det_buffer_size( enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes, - struct audio_output *audio_output); + struct pipe_ctx *pipes); /*temp: B0 specific before switch to dcn313 headers*/ #ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 82f966cf4ed2..20d714596021 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -1141,7 +1141,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(HUBP_CLK_CNTL, HUBP, id) + SRI_ARR(HUBP_CLK_CNTL, HUBP, id), \ + SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id) #define HUBP_REG_LIST_DCN2_COMMON_RI(id) \ HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \ SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index b3988e38d0a6..068c123ea8a8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -708,6 +708,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = false, + .disable_force_pstate_allow_on_hw_release = false, .exit_idle_opt_for_cursor_updates = true, .using_dml2 = true, .using_dml21 = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 2ae6831c31ef..0fc66487d800 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -140,7 +140,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ - SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id), \ + SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile new file mode 100644 index 000000000000..bc93356a0b5b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: MIT +# +# Copyright 2025 Advanced Micro Devices, Inc. +# Makefile for bounding box component. +# Floating point required due to nature of bounding box values + +soc_and_ip_translator_ccflags := $(CC_FLAGS_FPU) +soc_and_ip_translator_rcflags := $(CC_FLAGS_NO_FPU) + +CFLAGS_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_rcflags) + +soc_and_ip_translator := soc_and_ip_translator.o +soc_and_ip_translator += dcn401/dcn401_soc_and_ip_translator.o + +AMD_DAL_soc_and_ip_translator := $(addprefix $(AMDDALPATH)/dc/soc_and_ip_translator/, $(soc_and_ip_translator)) + +AMD_DISPLAY_FILES += $(AMD_DAL_soc_and_ip_translator) diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c new file mode 100644 index 000000000000..3190c76eb482 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dcn401_soc_and_ip_translator.h" +#include "bounding_boxes/dcn4_soc_bb.h" + +/* soc_and_ip_translator component used to get up-to-date values for bounding box. + * Bounding box values are stored in several locations and locations can vary with DCN revision. + * This component provides an interface to get DCN-specific bounding box values. + */ + +static void get_default_soc_bb(struct dml2_soc_bb *soc_bb) +{ + memcpy(soc_bb, &dml2_socbb_dcn401, sizeof(struct dml2_soc_bb)); + memcpy(&soc_bb->qos_parameters, &dml_dcn4_variant_a_soc_qos_params, sizeof(struct dml2_soc_qos_parameters)); +} + +/* + * DC clock table is obtained from SMU during runtime. + * SMU stands for System Management Unit. It is a power management processor. + * It owns the initialization of dc's clock table and programming of clock values + * based on dc's requests. + * Our clock values in base soc bb is a dummy placeholder. The real clock values + * are retrieved from SMU firmware to dc clock table at runtime. + * This function overrides our dummy placeholder values with real values in dc + * clock table. + */ +static void dcn401_convert_dc_clock_table_to_soc_bb_clock_table( + struct dml2_soc_state_table *dml_clk_table, + const struct clk_bw_params *dc_bw_params, + bool use_clock_dc_limits) +{ + int i; + const struct clk_limit_table *dc_clk_table; + + if (dc_bw_params == NULL) + /* skip if bw params could not be obtained from smu */ + return; + + dc_clk_table = &dc_bw_params->clk_table; + + /* dcfclk */ + if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) { + dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dcfclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz && + dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000; + dml_clk_table->dcfclk.num_clk_values = i + 1; + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + dml_clk_table->dcfclk.num_clk_values = i; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + } + } + } + + /* fclk */ + if (dc_clk_table->num_entries_per_clk.num_fclk_levels) { + dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->fclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz && + dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) { + dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000; + dml_clk_table->fclk.num_clk_values = i + 1; + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + dml_clk_table->fclk.num_clk_values = i; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + } + } + } + + /* uclk */ + if (dc_clk_table->num_entries_per_clk.num_memclk_levels) { + dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->uclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz && + dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) { + dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000; + dml_clk_table->uclk.num_clk_values = i + 1; + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + dml_clk_table->uclk.num_clk_values = i; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + } + } + } + + /* dispclk */ + if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) { + dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dispclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz && + dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) { + dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000; + dml_clk_table->dispclk.num_clk_values = i + 1; + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + dml_clk_table->dispclk.num_clk_values = i; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + } + } + } + + /* dppclk */ + if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) { + dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dppclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz && + dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) { + dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000; + dml_clk_table->dppclk.num_clk_values = i + 1; + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + dml_clk_table->dppclk.num_clk_values = i; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + } + } + } + + /* dtbclk */ + if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) { + dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dtbclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz && + dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000; + dml_clk_table->dtbclk.num_clk_values = i + 1; + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + dml_clk_table->dtbclk.num_clk_values = i; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + } + } + } + + /* socclk */ + if (dc_clk_table->num_entries_per_clk.num_socclk_levels) { + dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->socclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz && + dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) { + dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000; + dml_clk_table->socclk.num_clk_values = i + 1; + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + dml_clk_table->socclk.num_clk_values = i; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + } + } + } + + /* dram config */ + dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; + dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; +} + +void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + soc_bb->dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000; + soc_bb->dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + soc_bb->mall_allocated_for_dcn_mbytes = dc->caps.mall_size_total / (1024 * 1024); + + if (dc->clk_mgr->funcs->is_smu_present && + dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr)) { + dcn401_convert_dc_clock_table_to_soc_bb_clock_table(&soc_bb->clk_table, + dc->clk_mgr->bw_params, + config->use_clock_dc_limits); + } +} + +void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc) +{ + soc_bb->dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + soc_bb->xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000; + + /* latencies in vbios are platform specific and should be used if provided */ + if (dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns) + soc_bb->power_management_parameters.dram_clk_change_blackout_us = + dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns / 10.0; + + if (dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns) + soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns / 10.0; + + if (dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns) + soc_bb->power_management_parameters.stutter_exit_latency_us = + dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns / 10.0; +} + +void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc) +{ + /* set if the value is provided */ + if (dc->bb_overrides.sr_exit_time_ns) + soc_bb->power_management_parameters.stutter_exit_latency_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns) + soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.dram_clock_change_latency_ns) + soc_bb->power_management_parameters.dram_clk_change_blackout_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.fclk_clock_change_latency_ns) + soc_bb->power_management_parameters.fclk_change_blackout_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0; + + //Z8 values not expected nor used on DCN401 but still added for completeness + if (dc->bb_overrides.sr_exit_z8_time_ns) + soc_bb->power_management_parameters.z8_stutter_exit_latency_us = + dc->bb_overrides.sr_exit_z8_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns) + soc_bb->power_management_parameters.z8_stutter_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0; +} + +static void apply_soc_bb_updates(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + /* Individual modification can be overwritten even if it was obtained by a previous function. + * Modifications are acquired in order of priority (lowest to highest). + */ + dc_assert_fp_enabled(); + + dcn401_update_soc_bb_with_values_from_clk_mgr(soc_bb, dc, config); + dcn401_update_soc_bb_with_values_from_vbios(soc_bb, dc); + dcn401_update_soc_bb_with_values_from_software_policy(soc_bb, dc); +} + +void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + //get default soc_bb with static values + get_default_soc_bb(soc_bb); + //update soc_bb values with more accurate values + apply_soc_bb_updates(soc_bb, dc, config); +} + +static void dcn401_get_ip_caps(struct dml2_ip_capabilities *ip_caps) +{ + *ip_caps = dml2_dcn401_max_ip_caps; +} + +static struct soc_and_ip_translator_funcs dcn401_translator_funcs = { + .get_soc_bb = dcn401_get_soc_bb, + .get_ip_caps = dcn401_get_ip_caps, +}; + +void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator) +{ + soc_and_ip_translator->translator_funcs = &dcn401_translator_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h new file mode 100644 index 000000000000..21d842857601 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef _DCN401_SOC_AND_IP_TRANSLATOR_H_ +#define _DCN401_SOC_AND_IP_TRANSLATOR_H_ + +#include "core_types.h" +#include "dc.h" +#include "clk_mgr.h" +#include "soc_and_ip_translator.h" +#include "dml2/dml21/inc/dml_top_soc_parameter_types.h" + +void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator); + +/* Functions that can be re-used by higher DCN revisions of this component */ +void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); +void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); +void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc); +void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc); + +#endif /* _DCN401_SOC_AND_IP_TRANSLATOR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c new file mode 100644 index 000000000000..c9e224d262c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dcn42_soc_and_ip_translator.h" +#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h" +#include "bounding_boxes/dcn42_soc_bb.h" + +/* soc_and_ip_translator component used to get up-to-date values for bounding box. + * Bounding box values are stored in several locations and locations can vary with DCN revision. + * This component provides an interface to get DCN-specific bounding box values. + */ + +static void dcn42_get_ip_caps(struct dml2_ip_capabilities *ip_caps) +{ + *ip_caps = dml2_dcn42_max_ip_caps; +} + +static struct soc_and_ip_translator_funcs dcn42_translator_funcs = { + .get_soc_bb = dcn401_get_soc_bb, + .get_ip_caps = dcn42_get_ip_caps, +}; + +void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator) +{ + soc_and_ip_translator->translator_funcs = &dcn42_translator_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h new file mode 100644 index 000000000000..914dcbb369a7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef _DCN42_SOC_AND_IP_TRANSLATOR_H_ +#define _DCN42_SOC_AND_IP_TRANSLATOR_H_ + +#include "core_types.h" +#include "dc.h" +#include "clk_mgr.h" +#include "dml_top_soc_parameter_types.h" +#include "soc_and_ip_translator.h" + +void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator); + +#endif /* _DCN42_SOC_AND_IP_TRANSLATOR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c new file mode 100644 index 000000000000..0fc0e5a6c171 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "soc_and_ip_translator.h" +#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h" + +static void dc_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator, + enum dce_version dc_version) +{ + switch (dc_version) { + case DCN_VERSION_4_01: + dcn401_construct_soc_and_ip_translator(soc_and_ip_translator); + break; + default: + break; + } +} + +struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version) +{ + struct soc_and_ip_translator *soc_and_ip_translator; + + soc_and_ip_translator = kzalloc(sizeof(*soc_and_ip_translator), GFP_KERNEL); + if (!soc_and_ip_translator) + return NULL; + + dc_construct_soc_and_ip_translator(soc_and_ip_translator, dc_version); + + return soc_and_ip_translator; +} + +void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator) +{ + kfree(*soc_and_ip_translator); + *soc_and_ip_translator = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c index ad088d70e189..6ffc74fc9dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c @@ -44,6 +44,11 @@ static void virtual_stream_encoder_dvi_set_stream_attribute( struct dc_crtc_timing *crtc_timing, bool is_dual_link) {} +static void virtual_stream_encoder_lvds_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing) +{} + static void virtual_stream_encoder_set_throttled_vcp_size( struct stream_encoder *enc, struct fixed31_32 avg_time_slots_per_mtp) @@ -115,6 +120,8 @@ static const struct stream_encoder_funcs virtual_str_enc_funcs = { virtual_stream_encoder_hdmi_set_stream_attribute, .dvi_set_stream_attribute = virtual_stream_encoder_dvi_set_stream_attribute, + .lvds_set_stream_attribute = + virtual_stream_encoder_lvds_set_stream_attribute, .set_throttled_vcp_size = virtual_stream_encoder_set_throttled_vcp_size, .update_hdmi_info_packets = diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 0bafb6710761..338fdc651f2c 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -316,6 +316,7 @@ struct dmub_srv_hw_params { bool disable_sldo_opt; bool enable_non_transparent_setconfig; bool lower_hbr3_phy_ssc; + bool override_hbr3_pll_vco; }; /** @@ -567,6 +568,7 @@ struct dmub_srv { bool sw_init; bool hw_init; + bool dpia_supported; uint64_t fb_base; uint64_t fb_offset; @@ -597,6 +599,8 @@ struct dmub_notification { enum dmub_notification_type type; uint8_t link_index; uint8_t result; + /* notify instance from DMUB */ + uint8_t instance; bool pending_notification; union { struct aux_reply_data aux_reply; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c587b3441e07..e65747f7f12f 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -843,7 +843,8 @@ union dmub_fw_boot_options { uint32_t ips_sequential_ono: 1; /**< 1 to enable sequential ONO IPS sequence */ uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */ uint32_t lower_hbr3_phy_ssc: 1; /**< 1 to lower hbr3 phy ssc to 0.125 percent */ - uint32_t reserved : 6; /**< reserved */ + uint32_t override_hbr3_pll_vco: 1; /**< 1 to override the hbr3 pll vco to 0 */ + uint32_t reserved : 5; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; @@ -882,7 +883,7 @@ enum dmub_shared_state_feature_id { /** * struct dmub_shared_state_ips_fw - Firmware signals for IPS. */ - union dmub_shared_state_ips_fw_signals { +union dmub_shared_state_ips_fw_signals { struct { uint32_t ips1_commit : 1; /**< 1 if in IPS1 or IPS0 RCG */ uint32_t ips2_commit : 1; /**< 1 if in IPS2 */ @@ -897,7 +898,7 @@ enum dmub_shared_state_feature_id { /** * struct dmub_shared_state_ips_signals - Firmware signals for IPS. */ - union dmub_shared_state_ips_driver_signals { +union dmub_shared_state_ips_driver_signals { struct { uint32_t allow_pg : 1; /**< 1 if PG is allowed */ uint32_t allow_ips1 : 1; /**< 1 is IPS1 is allowed */ @@ -1990,18 +1991,19 @@ struct dmub_cmd_lsdma_data { struct lsdma_tiled_copy_data { uint32_t src_addr_lo; uint32_t src_addr_hi; + uint32_t dst_addr_lo; uint32_t dst_addr_hi; uint32_t src_x : 16; uint32_t src_y : 16; - uint32_t src_width : 16; - uint32_t src_height : 16; - uint32_t dst_x : 16; uint32_t dst_y : 16; + uint32_t src_width : 16; + uint32_t src_height : 16; + uint32_t dst_width : 16; uint32_t dst_height : 16; @@ -2034,23 +2036,58 @@ struct dmub_cmd_lsdma_data { uint32_t padding : 30; } tiled_copy_data; struct lsdma_linear_copy_data { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + uint32_t count : 30; uint32_t cache_policy_dst : 2; uint32_t tmz : 1; uint32_t cache_policy_src : 2; uint32_t padding : 29; - + } linear_copy_data; + struct lsdma_linear_sub_window_copy_data { uint32_t src_lo; uint32_t src_hi; + uint32_t dst_lo; uint32_t dst_hi; - } linear_copy_data; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_pitch : 16; + uint32_t dst_pitch : 16; + + uint32_t src_slice_pitch; + uint32_t dst_slice_pitch; + + uint32_t tmz : 1; + uint32_t element_size : 3; + uint32_t src_cache_policy : 3; + uint32_t dst_cache_policy : 3; + uint32_t reserved0 : 22; + } linear_sub_window_copy_data; struct lsdma_reg_write_data { uint32_t reg_addr; uint32_t reg_data; } reg_write_data; struct lsdma_pio_copy_data { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + union { struct { uint32_t byte_count : 26; @@ -2063,12 +2100,11 @@ struct dmub_cmd_lsdma_data { } fields; uint32_t raw; } packet; - uint32_t src_lo; - uint32_t src_hi; - uint32_t dst_lo; - uint32_t dst_hi; } pio_copy_data; struct lsdma_pio_constfill_data { + uint32_t dst_lo; + uint32_t dst_hi; + union { struct { uint32_t byte_count : 26; @@ -2081,14 +2117,12 @@ struct dmub_cmd_lsdma_data { } fields; uint32_t raw; } packet; - uint32_t dst_lo; - uint32_t dst_hi; + uint32_t data; } pio_constfill_data; uint32_t all[14]; } u; - }; struct dmub_rb_cmd_lsdma { @@ -2330,6 +2364,7 @@ struct dmub_cmd_fams2_global_config { union dmub_fams2_global_feature_config features; uint32_t recovery_timeout_us; uint32_t hwfq_flip_programming_delay_us; + uint32_t max_allow_to_target_delta_us; // how early DCN could assert P-State allow compared to the P-State target }; union dmub_cmd_fams2_config { @@ -3986,6 +4021,10 @@ enum dmub_cmd_replay_type { */ DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP = 8, /** + * Set version + */ + DMUB_CMD__REPLAY_SET_VERSION = 9, + /** * Set Replay General command. */ DMUB_CMD__REPLAY_SET_GENERAL_CMD = 16, @@ -4015,6 +4054,10 @@ struct dmub_alpm_auxless_data { uint16_t lfps_t1_t2_override_us; short lfps_t1_t2_offset_us; uint8_t lttpr_count; + /* + * Padding to align structure to 4 byte boundary. + */ + uint8_t pad[1]; }; /** @@ -4048,14 +4091,6 @@ struct dmub_cmd_replay_copy_settings_data { */ uint8_t digbe_inst; /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; - /** * AUX HW instance. */ uint8_t aux_inst; @@ -4099,13 +4134,77 @@ struct dmub_cmd_replay_copy_settings_data { * Use for AUX-less ALPM LFPS wake operation */ struct dmub_alpm_auxless_data auxless_alpm_data; - + /** + * @hpo_stream_enc_inst: HPO stream encoder instance + */ + uint8_t hpo_stream_enc_inst; + /** + * @hpo_link_enc_inst: HPO link encoder instance + */ + uint8_t hpo_link_enc_inst; /** * @pad: Align structure to 4 byte boundary. */ uint8_t pad[2]; }; + +/** + * Replay versions. + */ +enum replay_version { + /** + * FreeSync Replay + */ + REPLAY_VERSION_FREESYNC_REPLAY = 0, + /** + * Panel Replay + */ + REPLAY_VERSION_PANEL_REPLAY = 1, + /** + * Replay not supported. + */ + REPLAY_VERSION_UNSUPPORTED = 0xFF, +}; + +/** + * Data passed from driver to FW in a DMUB_CMD___SET_REPLAY_VERSION command. + */ +struct dmub_cmd_replay_set_version_data { + /** + * Panel Instance. + * Panel instance to identify which psr_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * PSR version that FW should implement. + */ + enum replay_version version; + /** + * PSR control version. + */ + uint8_t cmd_version; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; +}; + +/** + * Definition of a DMUB_CMD__REPLAY_SET_VERSION command. + */ +struct dmub_rb_cmd_replay_set_version { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_VERSION command. + */ + struct dmub_cmd_replay_set_version_data replay_set_version_data; +}; + /** * Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ @@ -4159,18 +4258,6 @@ struct dmub_rb_cmd_replay_enable_data { * This does not support HDMI/DP2 for now. */ uint8_t phy_rate; - /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; - /** - * @pad: Align structure to 4 byte boundary. - */ - uint8_t pad[2]; }; /** @@ -4470,6 +4557,10 @@ union dmub_replay_cmd_set { */ struct dmub_cmd_replay_disabled_adaptive_sync_sdp_data disabled_adaptive_sync_sdp_data; /** + * Definition of DMUB_CMD__REPLAY_SET_VERSION command data. + */ + struct dmub_cmd_replay_set_version_data version_data; + /** * Definition of DMUB_CMD__REPLAY_SET_GENERAL_CMD command data. */ struct dmub_cmd_replay_set_general_cmd_data set_general_cmd_data; @@ -4685,21 +4776,25 @@ enum dmub_cmd_lsdma_type { */ DMUB_CMD__LSDMA_LINEAR_COPY = 1, /** + * LSDMA copies data from source to destination linearly in sub window + */ + DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY = 2, + /** * Send the tiled-to-tiled copy command */ - DMUB_CMD__LSDMA_TILED_TO_TILED_COPY = 2, + DMUB_CMD__LSDMA_TILED_TO_TILED_COPY = 3, /** * Send the poll reg write command */ - DMUB_CMD__LSDMA_POLL_REG_WRITE = 3, + DMUB_CMD__LSDMA_POLL_REG_WRITE = 4, /** * Send the pio copy command */ - DMUB_CMD__LSDMA_PIO_COPY = 4, + DMUB_CMD__LSDMA_PIO_COPY = 5, /** * Send the pio constfill command */ - DMUB_CMD__LSDMA_PIO_CONSTFILL = 5, + DMUB_CMD__LSDMA_PIO_CONSTFILL = 6, }; struct abm_ace_curve { @@ -5914,6 +6009,9 @@ enum ips_residency_mode { IPS_RESIDENCY__IPS2, IPS_RESIDENCY__IPS1_RCG, IPS_RESIDENCY__IPS1_ONO2_ON, + IPS_RESIDENCY__IPS1_Z8_RETENTION, + IPS_RESIDENCY__PG_ONO_LAST_SEEN_IN_IPS, + IPS_RESIDENCY__PG_ONO_CURRENT_STATE }; #define NUM_IPS_HISTOGRAM_BUCKETS 16 @@ -5927,6 +6025,8 @@ struct dmub_ips_residency_info { uint32_t histogram[NUM_IPS_HISTOGRAM_BUCKETS]; uint64_t total_time_us; uint64_t total_inactive_time_us; + uint32_t ono_pg_state_at_collection; + uint32_t ono_pg_state_last_seen_in_ips; }; /** @@ -6223,6 +6323,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. */ struct dmub_rb_cmd_idle_opt_set_dc_power_state idle_opt_set_dc_power_state; + /** + * Definition of a DMUB_CMD__REPLAY_SET_VERSION command. + */ + struct dmub_rb_cmd_replay_set_version replay_set_version; /* * Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ @@ -6431,15 +6535,18 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->wrpt); - const uint64_t *src = (const uint64_t *)cmd; + uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; + const uint8_t *src = (const uint8_t *)cmd; uint8_t i; + if (rb->capacity == 0) + return false; + if (dmub_rb_full(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + for (i = 0; i < DMUB_RB_CMD_SIZE; i++) *dst++ = *src++; rb->wrpt += DMUB_RB_CMD_SIZE; @@ -6464,6 +6571,9 @@ static inline bool dmub_rb_out_push_front(struct dmub_rb *rb, uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; const uint8_t *src = (const uint8_t *)cmd; + if (rb->capacity == 0) + return false; + if (dmub_rb_full(rb)) return false; @@ -6509,6 +6619,9 @@ static inline void dmub_rb_get_rptr_with_offset(struct dmub_rb *rb, uint32_t num_cmds, uint32_t *next_rptr) { + if (rb->capacity == 0) + return; + *next_rptr = rb->rptr + DMUB_RB_CMD_SIZE * num_cmds; if (*next_rptr >= rb->capacity) @@ -6572,6 +6685,9 @@ static inline bool dmub_rb_out_front(struct dmub_rb *rb, */ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) { + if (rb->capacity == 0) + return false; + if (dmub_rb_empty(rb)) return false; @@ -6596,6 +6712,9 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t rptr = rb->rptr; uint32_t wptr = rb->wrpt; + if (rb->capacity == 0) + return; + while (rptr != wptr) { uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 3f38db752b84..4777c7203b2c 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -377,6 +377,7 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; + boot_options.bits.override_hbr3_pll_vco = params->override_hbr3_pll_vco; boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 2228d62adc7e..834e5434ccb8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -400,13 +400,14 @@ union dmub_fw_boot_options dmub_dcn35_get_fw_boot_option(struct dmub_srv *dmub) void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params) { union dmub_fw_boot_options boot_options = {0}; - union dmub_fw_boot_options cur_boot_options = {0}; - cur_boot_options = dmub_dcn35_get_fw_boot_option(dmub); + if (!dmub->dpia_supported) { + dmub->dpia_supported = dmub_dcn35_get_fw_boot_option(dmub).bits.enable_dpia; + } boot_options.bits.z10_disable = params->disable_z10; boot_options.bits.dpia_supported = params->dpia_supported; - boot_options.bits.enable_dpia = cur_boot_options.bits.enable_dpia && !params->disable_dpia; + boot_options.bits.enable_dpia = dmub->dpia_supported && !params->disable_dpia; boot_options.bits.usb4_cm_version = params->usb4_cm_version; boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c index 567c5b1aeb7a..e7a58b140388 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c @@ -71,7 +71,7 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, switch (cmd.cmd_common.header.type) { case DMUB_OUT_CMD__DP_AUX_REPLY: notify->type = DMUB_NOTIFICATION_AUX_REPLY; - notify->link_index = cmd.dp_aux_reply.control.instance; + notify->instance = cmd.dp_aux_reply.control.instance; notify->result = cmd.dp_aux_reply.control.result; dmub_memcpy((void *)¬ify->aux_reply, (void *)&cmd.dp_aux_reply.reply_data, sizeof(struct aux_reply_data)); @@ -84,17 +84,17 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, notify->type = DMUB_NOTIFICATION_HPD_IRQ; } - notify->link_index = cmd.dp_hpd_notify.hpd_data.instance; + notify->instance = cmd.dp_hpd_notify.hpd_data.instance; notify->result = AUX_RET_SUCCESS; break; case DMUB_OUT_CMD__SET_CONFIG_REPLY: notify->type = DMUB_NOTIFICATION_SET_CONFIG_REPLY; - notify->link_index = cmd.set_config_reply.set_config_reply_control.instance; + notify->instance = cmd.set_config_reply.set_config_reply_control.instance; notify->sc_status = cmd.set_config_reply.set_config_reply_control.status; break; case DMUB_OUT_CMD__DPIA_NOTIFICATION: notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION; - notify->link_index = cmd.dpia_notification.payload.header.instance; + notify->instance = cmd.dpia_notification.payload.header.instance; break; case DMUB_OUT_CMD__HPD_SENSE_NOTIFY: notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 5fc29164e4b4..8aea50aa9533 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -213,6 +213,11 @@ enum { #endif #define DEVICE_ID_NV_13FE 0x13FE // CYAN_SKILLFISH #define DEVICE_ID_NV_143F 0x143F +#define DEVICE_ID_NV_13F9 0x13F9 +#define DEVICE_ID_NV_13FA 0x13FA +#define DEVICE_ID_NV_13FB 0x13FB +#define DEVICE_ID_NV_13FC 0x13FC +#define DEVICE_ID_NV_13DB 0x13DB #define FAMILY_VGH 144 #define DEVICE_ID_VGH_163F 0x163F #define DEVICE_ID_VGH_1435 0x1435 diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 71efd2770c99..ce421bcddcb0 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -226,8 +226,8 @@ static void update_v_total_for_static_ramp( unsigned int target_duration_in_us = calc_duration_in_us_from_refresh_in_uhz( in_out_vrr->fixed.target_refresh_in_uhz); - bool ramp_direction_is_up = (current_duration_in_us > - target_duration_in_us) ? true : false; + bool ramp_direction_is_up = current_duration_in_us > + target_duration_in_us; /* Calculate ratio between new and current frame duration with 3 digit */ unsigned int frame_duration_ratio = div64_u64(1000000, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 5e01c6e24cbc..c760216a6240 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -29,6 +29,7 @@ static void push_error_status(struct mod_hdcp *hdcp, enum mod_hdcp_status status) { struct mod_hdcp_trace *trace = &hdcp->connection.trace; + const uint8_t retry_limit = hdcp->connection.link.adjust.retry_limit; if (trace->error_count < MAX_NUM_OF_ERROR_TRACE) { trace->errors[trace->error_count].status = status; @@ -39,11 +40,11 @@ static void push_error_status(struct mod_hdcp *hdcp, if (is_hdcp1(hdcp)) { hdcp->connection.hdcp1_retry_count++; - if (hdcp->connection.hdcp1_retry_count == MAX_NUM_OF_ATTEMPTS) + if (hdcp->connection.hdcp1_retry_count == retry_limit) hdcp->connection.link.adjust.hdcp1.disable = 1; } else if (is_hdcp2(hdcp)) { hdcp->connection.hdcp2_retry_count++; - if (hdcp->connection.hdcp2_retry_count == MAX_NUM_OF_ATTEMPTS) + if (hdcp->connection.hdcp2_retry_count == retry_limit) hdcp->connection.link.adjust.hdcp2.disable = 1; } } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e58e7b93810b..6b7db8ec9a53 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -260,6 +260,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_FAILURE; } + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; mutex_lock(&psp->hdcp_context.mutex); diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index c42468bb70ac..b51ddf2846df 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -220,6 +220,7 @@ struct mod_hdcp_link_adjustment_hdcp2 { struct mod_hdcp_link_adjustment { uint8_t auth_delay; + uint8_t retry_limit; struct mod_hdcp_link_adjustment_hdcp1 hdcp1; struct mod_hdcp_link_adjustment_hdcp2 hdcp2; }; diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 5c86423c2e92..3d083010e734 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -211,7 +211,7 @@ atom_bios_string = "ATOM" }; */ -#pragma pack(1) /* BIOS data must use byte aligment*/ +#pragma pack(1) /* BIOS data must use byte alignment*/ enum atombios_image_offset{ OFFSET_TO_ATOM_ROM_HEADER_POINTER = 0x00000048, @@ -255,8 +255,8 @@ struct atom_rom_header_v2_2 uint16_t subsystem_vendor_id; uint16_t subsystem_id; uint16_t pci_info_offset; - uint16_t masterhwfunction_offset; //Offest for SW to get all command function offsets, Don't change the position - uint16_t masterdatatable_offset; //Offest for SW to get all data table offsets, Don't change the position + uint16_t masterhwfunction_offset; //Offset for SW to get all command function offsets, Don't change the position + uint16_t masterdatatable_offset; //Offset for SW to get all data table offsets, Don't change the position uint16_t reserved; uint32_t pspdirtableoffset; }; @@ -453,7 +453,7 @@ struct atom_dtd_format uint8_t refreshrate; }; -/* atom_dtd_format.modemiscinfo defintion */ +/* atom_dtd_format.modemiscinfo definition */ enum atom_dtd_format_modemiscinfo{ ATOM_HSYNC_POLARITY = 0x0002, ATOM_VSYNC_POLARITY = 0x0004, @@ -678,7 +678,7 @@ struct lcd_info_v2_1 uint32_t reserved1[8]; }; -/* lcd_info_v2_1.panel_misc defintion */ +/* lcd_info_v2_1.panel_misc definition */ enum atom_lcd_info_panel_misc{ ATOM_PANEL_MISC_FPDI =0x0002, }; @@ -716,7 +716,7 @@ enum atom_gpio_pin_assignment_gpio_id { /* gpio_id pre-define id for multiple usage */ /* GPIO use to control PCIE_VDDC in certain SLT board */ PCIE_VDDC_CONTROL_GPIO_PINID = 56, - /* if PP_AC_DC_SWITCH_GPIO_PINID in Gpio_Pin_LutTable, AC/DC swithing feature is enable */ + /* if PP_AC_DC_SWITCH_GPIO_PINID in Gpio_Pin_LutTable, AC/DC switching feature is enable */ PP_AC_DC_SWITCH_GPIO_PINID = 60, /* VDDC_REGULATOR_VRHOT_GPIO_PINID in Gpio_Pin_LutTable, VRHot feature is enable */ VDDC_VRHOT_GPIO_PINID = 61, @@ -734,7 +734,7 @@ enum atom_gpio_pin_assignment_gpio_id { struct atom_gpio_pin_lut_v2_1 { struct atom_common_table_header table_header; - /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ + /*the real number of this included in the structure is calculated by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ struct atom_gpio_pin_assignment gpio_pin[]; }; @@ -997,7 +997,7 @@ enum atom_connector_layout_info_mini_type_def { enum atom_display_device_tag_def{ ATOM_DISPLAY_LCD1_SUPPORT = 0x0002, //an embedded display is either an LVDS or eDP signal type of display - ATOM_DISPLAY_LCD2_SUPPORT = 0x0020, //second edp device tag 0x0020 for backward compability + ATOM_DISPLAY_LCD2_SUPPORT = 0x0020, //second edp device tag 0x0020 for backward compatibility ATOM_DISPLAY_DFP1_SUPPORT = 0x0008, ATOM_DISPLAY_DFP2_SUPPORT = 0x0080, ATOM_DISPLAY_DFP3_SUPPORT = 0x0200, @@ -1011,7 +1011,7 @@ struct atom_display_object_path_v2 { uint16_t display_objid; //Connector Object ID or Misc Object ID uint16_t disp_recordoffset; - uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or intenal encoder + uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or internal encoder uint16_t extencoderobjid; //2nd encoder after the first encoder, from the connector point of view; uint16_t encoder_recordoffset; uint16_t extencoder_recordoffset; @@ -1023,7 +1023,7 @@ struct atom_display_object_path_v2 struct atom_display_object_path_v3 { uint16_t display_objid; //Connector Object ID or Misc Object ID uint16_t disp_recordoffset; - uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or intenal encoder + uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or internal encoder uint16_t reserved1; //only on USBC case, otherwise always = 0 uint16_t reserved2; //reserved and always = 0 uint16_t reserved3; //reserved and always = 0 @@ -3547,7 +3547,7 @@ struct atom_voltage_object_header_v4{ enum atom_voltage_object_mode { VOLTAGE_OBJ_GPIO_LUT = 0, //VOLTAGE and GPIO Lookup table ->atom_gpio_voltage_object_v4 - VOLTAGE_OBJ_VR_I2C_INIT_SEQ = 3, //VOLTAGE REGULATOR INIT sequece through I2C -> atom_i2c_voltage_object_v4 + VOLTAGE_OBJ_VR_I2C_INIT_SEQ = 3, //VOLTAGE REGULATOR INIT sequence through I2C -> atom_i2c_voltage_object_v4 VOLTAGE_OBJ_PHASE_LUT = 4, //Set Vregulator Phase lookup table ->atom_gpio_voltage_object_v4 VOLTAGE_OBJ_SVID2 = 7, //Indicate voltage control by SVID2 ->atom_svid2_voltage_object_v4 VOLTAGE_OBJ_EVV = 8, @@ -3585,7 +3585,7 @@ struct atom_gpio_voltage_object_v4 { struct atom_voltage_object_header_v4 header; // voltage mode = VOLTAGE_OBJ_GPIO_LUT or VOLTAGE_OBJ_PHASE_LUT uint8_t gpio_control_id; // default is 0 which indicate control through CG VID mode - uint8_t gpio_entry_num; // indiate the entry numbers of Votlage/Gpio value Look up table + uint8_t gpio_entry_num; // indicate the entry numbers of Votlage/Gpio value Look up table uint8_t phase_delay_us; // phase delay in unit of micro second uint8_t reserved; uint32_t gpio_mask_val; // GPIO Mask value @@ -4507,8 +4507,8 @@ struct amd_acpi_description_header{ struct uefi_acpi_vfct{ struct amd_acpi_description_header sheader; uint8_t tableUUID[16]; //0x24 - uint32_t vbiosimageoffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the stucture. - uint32_t lib1Imageoffset; //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the stucture. + uint32_t vbiosimageoffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the structure. + uint32_t lib1Imageoffset; //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the structure. uint32_t reserved[4]; //0x3C }; @@ -4540,7 +4540,7 @@ struct gop_lib1_content { /* *************************************************************************** Scratch Register definitions - Each number below indicates which scratch regiser request, Active and + Each number below indicates which scratch register request, Active and Connect all share the same definitions as display_device_tag defines *************************************************************************** */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index e2b1ea7467b0..2f7e4b5bebf3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -30,6 +30,12 @@ extern const struct amdgpu_ip_block_version smu_v12_0_ip_block; extern const struct amdgpu_ip_block_version smu_v13_0_ip_block; extern const struct amdgpu_ip_block_version smu_v14_0_ip_block; +enum smu_temp_metric_type { + SMU_TEMP_METRIC_BASEBOARD, + SMU_TEMP_METRIC_GPUBOARD, + SMU_TEMP_METRIC_MAX, +}; + enum smu_event_type { SMU_EVENT_RESET_COMPLETE = 0, }; @@ -496,6 +502,8 @@ struct amd_pm_funcs { int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); ssize_t (*get_gpu_metrics)(void *handle, void **table); + ssize_t (*get_temp_metrics)(void *handle, enum smu_temp_metric_type type, void *table); + bool (*temp_metrics_is_supported)(void *handle, enum smu_temp_metric_type type); ssize_t (*get_xcp_metrics)(void *handle, int xcp_id, void *table); ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size); int (*set_watermarks_for_clock_ranges)(void *handle, @@ -1595,6 +1603,79 @@ struct amdgpu_pm_metrics { uint8_t data[]; }; +enum amdgpu_vr_temp { + AMDGPU_VDDCR_VDD0_TEMP, + AMDGPU_VDDCR_VDD1_TEMP, + AMDGPU_VDDCR_VDD2_TEMP, + AMDGPU_VDDCR_VDD3_TEMP, + AMDGPU_VDDCR_SOC_A_TEMP, + AMDGPU_VDDCR_SOC_C_TEMP, + AMDGPU_VDDCR_SOCIO_A_TEMP, + AMDGPU_VDDCR_SOCIO_C_TEMP, + AMDGPU_VDD_085_HBM_TEMP, + AMDGPU_VDDCR_11_HBM_B_TEMP, + AMDGPU_VDDCR_11_HBM_D_TEMP, + AMDGPU_VDD_USR_TEMP, + AMDGPU_VDDIO_11_E32_TEMP, + AMDGPU_VR_MAX_TEMP_ENTRIES, +}; + +enum amdgpu_system_temp { + AMDGPU_UBB_FPGA_TEMP, + AMDGPU_UBB_FRONT_TEMP, + AMDGPU_UBB_BACK_TEMP, + AMDGPU_UBB_OAM7_TEMP, + AMDGPU_UBB_IBC_TEMP, + AMDGPU_UBB_UFPGA_TEMP, + AMDGPU_UBB_OAM1_TEMP, + AMDGPU_OAM_0_1_HSC_TEMP, + AMDGPU_OAM_2_3_HSC_TEMP, + AMDGPU_OAM_4_5_HSC_TEMP, + AMDGPU_OAM_6_7_HSC_TEMP, + AMDGPU_UBB_FPGA_0V72_VR_TEMP, + AMDGPU_UBB_FPGA_3V3_VR_TEMP, + AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP, + AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP, + AMDGPU_RETIMER_0_1_0V9_VR_TEMP, + AMDGPU_RETIMER_4_5_0V9_VR_TEMP, + AMDGPU_RETIMER_2_3_0V9_VR_TEMP, + AMDGPU_RETIMER_6_7_0V9_VR_TEMP, + AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP, + AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP, + AMDGPU_IBC_HSC_TEMP, + AMDGPU_IBC_TEMP, + AMDGPU_SYSTEM_MAX_TEMP_ENTRIES = 32, +}; + +enum amdgpu_node_temp { + AMDGPU_RETIMER_X_TEMP, + AMDGPU_OAM_X_IBC_TEMP, + AMDGPU_OAM_X_IBC_2_TEMP, + AMDGPU_OAM_X_VDD18_VR_TEMP, + AMDGPU_OAM_X_04_HBM_B_VR_TEMP, + AMDGPU_OAM_X_04_HBM_D_VR_TEMP, + AMDGPU_NODE_MAX_TEMP_ENTRIES = 12, +}; + +struct amdgpu_gpuboard_temp_metrics_v1_0 { + struct metrics_table_header common_header; + uint16_t label_version; + uint16_t node_id; + uint64_t accumulation_counter; + /* Encoded temperature in Celcius, 24:31 is sensor id 0:23 is temp value */ + uint32_t node_temp[AMDGPU_NODE_MAX_TEMP_ENTRIES]; + uint32_t vr_temp[AMDGPU_VR_MAX_TEMP_ENTRIES]; +}; + +struct amdgpu_baseboard_temp_metrics_v1_0 { + struct metrics_table_header common_header; + uint16_t label_version; + uint16_t node_id; + uint64_t accumulation_counter; + /* Encoded temperature in Celcius, 24:31 is sensor id 0:23 is temp value */ + uint32_t system_temp[AMDGPU_SYSTEM_MAX_TEMP_ENTRIES]; +}; + struct amdgpu_partition_metrics_v1_0 { struct metrics_table_header common_header; /* Current clocks (Mhz) */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index d85ffab2aff9..c04bd351b250 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -66,6 +66,7 @@ enum MES_SCH_API_OPCODE { MES_SCH_API_SET_SE_MODE = 17, MES_SCH_API_SET_GANG_SUBMIT = 18, MES_SCH_API_SET_HW_RSRC_1 = 19, + MES_SCH_API_INV_TLBS = 20, MES_SCH_API_MAX = 0xFF }; @@ -870,6 +871,35 @@ union MESAPI__SET_GANG_SUBMIT { uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; }; +/* + * @inv_sel 0-select pasid as input to do the invalidation , 1-select vmid + * @flush_type 0-old style, 1-light weight, 2-heavyweight, 3-heavyweight2 + * @inv_sel_id specific pasid when inv_sel is 0 and specific vmid if inv_sel is 1 + * @hub_id 0-gc_hub, 1-mm_hub + */ +struct INV_TLBS { + uint8_t inv_sel; + uint8_t flush_type; + uint16_t inv_sel_id; + uint32_t hub_id; + /* If following two inv_range setting are all 0 , whole VM will be invalidated, + * otherwise only required range be invalidated + */ + uint64_t inv_range_va_start; + uint64_t inv_range_size; + uint64_t reserved; +}; + +union MESAPI__INV_TLBS { + struct { + union MES_API_HEADER header; + struct MES_API_STATUS api_status; + struct INV_TLBS invalidate_tlbs; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 71d986dd7a6e..518d07afc7df 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -764,10 +764,6 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) ret = smu_send_rma_reason(smu); mutex_unlock(&adev->pm.mutex); - if (adev->cper.enabled) - if (amdgpu_cper_generate_bp_threshold_record(adev)) - dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); - return ret; } @@ -824,6 +820,21 @@ int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask) return ret; } +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + bool ret; + + if (!is_support_sw_smu(adev)) + return false; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_vcn_is_supported(smu); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, @@ -2038,6 +2049,66 @@ int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev, } /** + * amdgpu_dpm_get_temp_metrics - Retrieve metrics for a specific compute + * partition + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * @table: Pointer to a buffer where the metrics will be stored. If NULL, the + * function returns the size of the metrics structure. + * + * This function retrieves metrics for a specific temperature type, If the + * table parameter is NULL, the function returns the size of the metrics + * structure without populating it. + * + * Return: Size of the metrics structure on success, or a negative error code on failure. + */ +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret; + + if (!pp_funcs->get_temp_metrics || + !amdgpu_dpm_is_temp_metrics_supported(adev, type)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->get_temp_metrics(adev->powerplay.pp_handle, type, table); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +/** + * amdgpu_dpm_is_temp_metrics_supported - Return if specific temperature metrics support + * is available + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * + * This function returns metrics if specific temperature metrics type is supported or not. + * + * Return: True in case of metrics type supported else false. + */ +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + bool support_temp_metrics = false; + + if (!pp_funcs->temp_metrics_is_supported) + return support_temp_metrics; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + support_temp_metrics = + pp_funcs->temp_metrics_is_supported(adev->powerplay.pp_handle, type); + mutex_unlock(&adev->pm.mutex); + } + + return support_temp_metrics; +} + +/** * amdgpu_dpm_get_xcp_metrics - Retrieve metrics for a specific compute * partition * @adev: Pointer to the device. diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c index 42efe838fa85..2d2d2d5e6763 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c @@ -66,6 +66,13 @@ u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) (amdgpu_crtc->v_border * 2)); vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; + + /* we have issues with mclk switching with + * refresh rates over 120 hz on the non-DC code. + */ + if (drm_mode_vrefresh(&amdgpu_crtc->hw_mode) > 120) + vblank_time_us = 0; + break; } } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4b64851fdb42..96590c1da553 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -110,9 +110,10 @@ static int amdgpu_pm_dev_state_check(struct amdgpu_device *adev, bool runpm) bool runpm_check = runpm ? adev->in_runpm : false; if (amdgpu_in_reset(adev)) - return -EPERM; + return -EBUSY; + if (adev->in_suspend && !runpm_check) - return -EPERM; + return -EBUSY; return 0; } @@ -2073,6 +2074,134 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd return 0; } +/** + * DOC: board + * + * Certain SOCs can support various board attributes reporting. This is useful + * for user application to monitor various board reated attributes. + * + * The amdgpu driver provides a sysfs API for reporting board attributes. Presently, + * only two types of attributes are reported, baseboard temperature and + * gpu board temperature. Both of them are reported as binary files. + * + * * .. code-block:: console + * + * hexdump /sys/bus/pci/devices/.../board/baseboard_temp + * + * hexdump /sys/bus/pci/devices/.../board/gpuboard_temp + * + */ + +/** + * DOC: baseboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current baseboard + * temperature metrics data. The file baseboard_temp is used for this. + * Reading the file will dump all the current baseboard temperature metrics data. + */ +static ssize_t amdgpu_get_baseboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +/** + * DOC: gpuboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current gpuboard + * temperature metrics data. The file gpuboard_temp is used for this. + * Reading the file will dump all the current gpuboard temperature metrics data. + */ +static ssize_t amdgpu_get_gpuboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL); +static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL); + +static struct attribute *board_attrs[] = { + &dev_attr_baseboard_temp.attr, + &dev_attr_gpuboard_temp.attr, + NULL +}; + +static umode_t amdgpu_board_attr_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_baseboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_BASEBOARD)) + return 0; + } + + if (attr == &dev_attr_gpuboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) + return 0; + } + + return attr->mode; +} + +const struct attribute_group amdgpu_board_attr_group = { + .name = "board", + .attrs = board_attrs, + .is_visible = amdgpu_board_attr_visible, +}; + /* pm policy attributes */ struct amdgpu_pm_policy_attr { struct device_attribute dev_attr; @@ -3458,14 +3587,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */ - if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) && - (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) - return 0; + if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) { + if (adev->family == AMDGPU_FAMILY_SI || + ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) && + (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) || + (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3))) + return 0; + } /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || @@ -4461,6 +4592,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) goto err_out0; } + if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { + ret = devm_device_add_group(adev->dev, + &amdgpu_board_attr_group); + if (ret) + goto err_out0; + } + adev->pm.sysfs_initialized = true; return 0; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 768317ee1486..9748744133d9 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -526,6 +526,8 @@ int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); ssize_t amdgpu_dpm_get_xcp_metrics(struct amdgpu_device *adev, int xcp_id, void *table); +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table); /** * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The @@ -613,5 +615,8 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask); +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index ea3ace882a10..52dbf6d0469d 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -771,8 +771,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev, int i; struct amdgpu_ps *ps; u32 ui_class; - bool single_display = (adev->pm.dpm.new_active_crtc_count < 2) ? - true : false; + bool single_display = adev->pm.dpm.new_active_crtc_count < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && adev->powerplay.pp_funcs->vblank_too_short) { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 52e732be59e3..6595a611ce6e 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3085,7 +3085,13 @@ static bool si_dpm_vblank_too_short(void *handle) /* we never hit the non-gddr5 limit so disable it */ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; - if (vblank_time < switch_limit) + /* Consider zero vblank time too short and disable MCLK switching. + * Note that the vblank time is set to maximum when no displays are attached, + * so we'll still enable MCLK switching in that case. + */ + if (vblank_time == 0) + return true; + else if (vblank_time < switch_limit) return true; else return false; @@ -3443,12 +3449,14 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, { struct si_ps *ps = si_get_ps(rps); struct amdgpu_clock_and_voltage_limits *max_limits; + struct amdgpu_connector *conn; bool disable_mclk_switching = false; bool disable_sclk_switching = false; u32 mclk, sclk; u16 vddc, vddci, min_vce_voltage = 0; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; u32 max_sclk = 0, max_mclk = 0; + u32 high_pixelclock_count = 0; int i; if (adev->asic_type == CHIP_HAINAN) { @@ -3476,6 +3484,35 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, } } + /* We define "high pixelclock" for SI as higher than necessary for 4K 30Hz. + * For example, 4K 60Hz and 1080p 144Hz fall into this category. + * Find number of such displays connected. + */ + for (i = 0; i < adev->mode_info.num_crtc; i++) { + if (!(adev->pm.dpm.new_active_crtcs & (1 << i)) || + !adev->mode_info.crtcs[i]->enabled) + continue; + + conn = to_amdgpu_connector(adev->mode_info.crtcs[i]->connector); + + if (conn->pixelclock_for_modeset > 297000) + high_pixelclock_count++; + } + + /* These are some ad-hoc fixes to some issues observed with SI GPUs. + * They are necessary because we don't have something like dce_calcs + * for these GPUs to calculate bandwidth requirements. + */ + if (high_pixelclock_count) { + /* On Oland, we observe some flickering when two 4K 60Hz + * displays are connected, possibly because voltage is too low. + * Raise the voltage by requiring a higher SCLK. + * (Voltage cannot be adjusted independently without also SCLK.) + */ + if (high_pixelclock_count > 1 && adev->asic_type == CHIP_OLAND) + disable_sclk_switching = true; + } + if (rps->vce_active) { rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk; rps->ecclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].ecclk; @@ -5637,14 +5674,10 @@ static int si_populate_smc_t(struct amdgpu_device *adev, static int si_disable_ulv(struct amdgpu_device *adev) { - struct si_power_info *si_pi = si_get_pi(adev); - struct si_ulv_param *ulv = &si_pi->ulv; - - if (ulv->supported) - return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? - 0 : -EINVAL; + PPSMC_Result r; - return 0; + r = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV); + return (r == PPSMC_Result_OK) ? 0 : -EINVAL; } static bool si_is_state_ulv_compatible(struct amdgpu_device *adev, @@ -5817,9 +5850,9 @@ static int si_upload_smc_data(struct amdgpu_device *adev) { struct amdgpu_crtc *amdgpu_crtc = NULL; int i; - - if (adev->pm.dpm.new_active_crtc_count == 0) - return 0; + u32 crtc_index = 0; + u32 mclk_change_block_cp_min = 0; + u32 mclk_change_block_cp_max = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->pm.dpm.new_active_crtcs & (1 << i)) { @@ -5828,26 +5861,31 @@ static int si_upload_smc_data(struct amdgpu_device *adev) } } - if (amdgpu_crtc == NULL) - return 0; + /* When a display is plugged in, program these so that the SMC + * performs MCLK switching when it doesn't cause flickering. + * When no display is plugged in, there is no need to restrict + * MCLK switching, so program them to zero. + */ + if (adev->pm.dpm.new_active_crtc_count && amdgpu_crtc) { + crtc_index = amdgpu_crtc->crtc_id; - if (amdgpu_crtc->line_time <= 0) - return 0; + if (amdgpu_crtc->line_time) { + mclk_change_block_cp_min = 200 / amdgpu_crtc->line_time; + mclk_change_block_cp_max = 100 / amdgpu_crtc->line_time; + } + } - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_crtc_index, - amdgpu_crtc->crtc_id) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_crtc_index, + crtc_index); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, - amdgpu_crtc->wm_high / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, + mclk_change_block_cp_min); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, - amdgpu_crtc->wm_low / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, + mclk_change_block_cp_max); return 0; } @@ -7954,6 +7992,7 @@ static void si_dpm_print_power_state(void *handle, amdgpu_dpm_dbg_print_class_info(adev, rps->class, rps->class2); amdgpu_dpm_dbg_print_cap_info(adev, rps->caps); drm_dbg(adev_to_drm(adev), "\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + drm_dbg(adev_to_drm(adev), "\tvce evclk: %d ecclk: %d\n", rps->evclk, rps->ecclk); for (i = 0; i < ps->performance_level_count; i++) { pl = &ps->performance_levels[i]; drm_dbg(adev_to_drm(adev), "\t\tpower level %d sclk: %u mclk: %u vddc: %u vddci: %u pcie gen: %u\n", diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index 4e65ab9e931c..281a5e377aee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -172,20 +172,42 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev, { u32 tmp; int i; + int usec_timeout; + + /* SMC seems to process some messages exceptionally slowly. */ + switch (msg) { + case PPSMC_MSG_NoForcedLevel: + case PPSMC_MSG_SetEnabledLevels: + case PPSMC_MSG_SetForcedLevels: + case PPSMC_MSG_DisableULV: + case PPSMC_MSG_SwitchToSwState: + usec_timeout = 1000000; /* 1 sec */ + break; + default: + usec_timeout = 200000; /* 200 ms */ + break; + } if (!amdgpu_si_is_smc_running(adev)) return PPSMC_Result_Failed; WREG32(mmSMC_MESSAGE_0, msg); - for (i = 0; i < adev->usec_timeout; i++) { + for (i = 0; i < usec_timeout; i++) { tmp = RREG32(mmSMC_RESP_0); if (tmp != 0) break; udelay(1); } - return (PPSMC_Result)RREG32(mmSMC_RESP_0); + tmp = RREG32(mmSMC_RESP_0); + if (tmp == 0) { + drm_warn(adev_to_drm(adev), + "%s timeout on message: %x (SMC_SCRATCH0: %x)\n", + __func__, msg, RREG32(mmSMC_SCRATCH0)); + } + + return (PPSMC_Result)tmp; } PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 8d40ed0f0e83..ce166a7f8e42 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -563,8 +563,8 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3( PP_ASSERT_WITH_CODE((NULL != voltage_info), "Could not find Voltage Table in BIOS.", return false;); - ret = (NULL != atomctrl_lookup_voltage_type_v3 - (voltage_info, voltage_type, voltage_mode)) ? true : false; + ret = atomctrl_lookup_voltage_type_v3 + (voltage_info, voltage_type, voltage_mode) != NULL; return ret; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9a821563bc8e..14ccd743ca1d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1032,7 +1032,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; uint32_t min_freq, max_freq = 0; - uint32_t ret = 0; + int ret = 0; switch (type) { case PP_SCLK: diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index 5e43ad2b2956..d2dbd90bb427 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2540,9 +2540,8 @@ static int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 17d2f5bff4a7..1f50f1e74c48 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2655,9 +2655,8 @@ static int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } const struct pp_smumgr_func iceland_smu_funcs = { diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c index ff6b563ecbf5..bf6d09572cfc 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c @@ -2578,9 +2578,8 @@ static int polaris10_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c index baf51cd82a35..0d4cbe4113a0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c @@ -401,7 +401,7 @@ failed: int smu7_check_fw_load_finish(struct pp_hwmgr *hwmgr, uint32_t fw_type) { struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - uint32_t ret; + int ret; ret = phm_wait_on_indirect_register(hwmgr, mmSMC_IND_INDEX_11, smu_data->soft_regs_start + smum_get_offsetof(hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c index 6fe6e6abb5d8..2e21f9d066cb 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c @@ -3139,9 +3139,8 @@ static int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b47cb4a5f488..c5965924e7c6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -766,6 +766,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) case IP_VERSION(13, 0, 14): case IP_VERSION(13, 0, 12): smu_v13_0_6_set_ppt_funcs(smu); + smu_v13_0_6_set_temp_funcs(smu); /* Enable pp_od_clk_voltage node */ smu->od_enabled = true; break; @@ -3831,6 +3832,51 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, return ret; } +static ssize_t smu_sys_get_temp_metrics(void *handle, enum smu_temp_metric_type type, void *table) +{ + struct smu_context *smu = handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + enum smu_table_id table_id; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->smu_temp.temp_funcs || !smu->smu_temp.temp_funcs->get_temp_metrics) + return -EOPNOTSUPP; + + table_id = smu_metrics_get_temp_table_id(type); + + if (table_id == SMU_TABLE_COUNT) + return -EINVAL; + + /* If the request is to get size alone, return the cached table size */ + if (!table && tables[table_id].cache.size) + return tables[table_id].cache.size; + + if (smu_table_cache_is_valid(&tables[table_id])) { + memcpy(table, tables[table_id].cache.buffer, + tables[table_id].cache.size); + return tables[table_id].cache.size; + } + + return smu->smu_temp.temp_funcs->get_temp_metrics(smu, type, table); +} + +static bool smu_temp_metrics_is_supported(void *handle, enum smu_temp_metric_type type) +{ + struct smu_context *smu = handle; + bool ret = false; + + if (!smu->pm_enabled) + return false; + + if (smu->smu_temp.temp_funcs && smu->smu_temp.temp_funcs->temp_metrics_is_supported) + ret = smu->smu_temp.temp_funcs->temp_metrics_is_supported(smu, type); + + return ret; +} + static ssize_t smu_sys_get_xcp_metrics(void *handle, int xcp_id, void *table) { struct smu_context *smu = handle; @@ -3903,6 +3949,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_dpm_clock_table = smu_get_dpm_clock_table, .get_smu_prv_buf_details = smu_get_prv_buffer_details, .get_xcp_metrics = smu_sys_get_xcp_metrics, + .get_temp_metrics = smu_sys_get_temp_metrics, + .temp_metrics_is_supported = smu_temp_metrics_is_supported, }; int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event, @@ -4076,6 +4124,16 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +bool smu_reset_vcn_is_supported(struct smu_context *smu) +{ + bool ret = false; + + if (smu->ppt_funcs && smu->ppt_funcs->reset_vcn_is_supported) + ret = smu->ppt_funcs->reset_vcn_is_supported(smu); + + return ret; +} + int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b52e194397e2..5976eda80035 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -249,6 +249,14 @@ struct smu_user_dpm_profile { tables[table_id].domain = d; \ } while (0) +struct smu_table_cache { + void *buffer; + size_t size; + /* interval in ms*/ + uint32_t interval; + unsigned long last_cache_time; +}; + struct smu_table { uint64_t size; uint32_t align; @@ -257,6 +265,7 @@ struct smu_table { void *cpu_addr; struct amdgpu_bo *bo; uint32_t version; + struct smu_table_cache cache; }; enum smu_perf_level_designation { @@ -322,6 +331,9 @@ enum smu_table_id { SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, SMU_TABLE_WIFIBAND, + SMU_TABLE_GPUBOARD_TEMP_METRICS, + SMU_TABLE_BASEBOARD_TEMP_METRICS, + SMU_TABLE_PMFW_SYSTEM_METRICS, SMU_TABLE_COUNT, }; @@ -396,6 +408,10 @@ struct smu_dpm_context { struct smu_dpm_policy_ctxt *dpm_policies; }; +struct smu_temp_context { + const struct smu_temp_funcs *temp_funcs; +}; + struct smu_power_gate { bool uvd_gated; bool vce_gated; @@ -529,6 +545,7 @@ struct smu_context { struct smu_table_context smu_table; struct smu_dpm_context smu_dpm; struct smu_power_context smu_power; + struct smu_temp_context smu_temp; struct smu_feature smu_feature; struct amd_pp_display_configuration *display_config; struct smu_baco_context smu_baco; @@ -624,6 +641,28 @@ struct smu_context { struct i2c_adapter; /** + * struct smu_temp_funcs - Callbacks used to get temperature data. + */ +struct smu_temp_funcs { + /** + * @get_temp_metrics: Calibrate voltage/frequency curve to fit the system's + * power delivery and voltage margins. Required for adaptive + * @type Temperature metrics type(baseboard/gpuboard) + * Return: Size of &table + */ + ssize_t (*get_temp_metrics)(struct smu_context *smu, + enum smu_temp_metric_type type, void *table); + + /** + * @temp_metrics_is_support: Get if specific temperature metrics is supported + * @type Temperature metrics type(baseboard/gpuboard) + * Return: true if supported else false + */ + bool (*temp_metrics_is_supported)(struct smu_context *smu, enum smu_temp_metric_type type); + +}; + +/** * struct pptable_funcs - Callbacks used to interact with the SMU. */ struct pptable_funcs { @@ -1397,6 +1436,10 @@ struct pptable_funcs { * @reset_vcn: message SMU to soft reset vcn instance. */ int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask); + /** + * @reset_vcn_is_supported: Check if support resets vcn. + */ + bool (*reset_vcn_is_supported)(struct smu_context *smu); /** * @get_ecc_table: message SMU to get ECC INFO table. @@ -1622,6 +1665,71 @@ typedef struct { struct smu_dpm_policy *smu_get_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type); +static inline enum smu_table_id +smu_metrics_get_temp_table_id(enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + return SMU_TABLE_BASEBOARD_TEMP_METRICS; + case SMU_TEMP_METRIC_GPUBOARD: + return SMU_TABLE_GPUBOARD_TEMP_METRICS; + default: + return SMU_TABLE_COUNT; + } + + return SMU_TABLE_COUNT; +} + +static inline void smu_table_cache_update_time(struct smu_table *table, + unsigned long time) +{ + table->cache.last_cache_time = time; +} + +static inline bool smu_table_cache_is_valid(struct smu_table *table) +{ + if (!table->cache.buffer || !table->cache.last_cache_time || + !table->cache.interval || !table->cache.size || + time_after(jiffies, + table->cache.last_cache_time + + msecs_to_jiffies(table->cache.interval))) + return false; + + return true; +} + +static inline int smu_table_cache_init(struct smu_context *smu, + enum smu_table_id table_id, size_t size, + uint32_t cache_interval) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + tables[table_id].cache.buffer = kzalloc(size, GFP_KERNEL); + if (!tables[table_id].cache.buffer) + return -ENOMEM; + + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = cache_interval; + tables[table_id].cache.size = size; + + return 0; +} + +static inline void smu_table_cache_fini(struct smu_context *smu, + enum smu_table_id table_id) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + if (tables[table_id].cache.buffer) { + kfree(tables[table_id].cache.buffer); + tables[table_id].cache.buffer = NULL; + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = 0; + } +} + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, @@ -1673,6 +1781,7 @@ int smu_send_rma_reason(struct smu_context *smu); int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); bool smu_reset_sdma_is_supported(struct smu_context *smu); int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask); +bool smu_reset_vcn_is_supported(struct smu_context *smu); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h index 0a2ca544f4e3..1c407a8e96ee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -135,7 +135,63 @@ typedef enum { GFX_DVM_MARGIN_COUNT } GFX_DVM_MARGIN_e; -#define SMU_METRICS_TABLE_VERSION 0x13 +typedef enum{ + SYSTEM_TEMP_UBB_FPGA, + SYSTEM_TEMP_UBB_FRONT, + SYSTEM_TEMP_UBB_BACK, + SYSTEM_TEMP_UBB_OAM7, + SYSTEM_TEMP_UBB_IBC, + SYSTEM_TEMP_UBB_UFPGA, + SYSTEM_TEMP_UBB_OAM1, + SYSTEM_TEMP_OAM_0_1_HSC, + SYSTEM_TEMP_OAM_2_3_HSC, + SYSTEM_TEMP_OAM_4_5_HSC, + SYSTEM_TEMP_OAM_6_7_HSC, + SYSTEM_TEMP_UBB_FPGA_0V72_VR, + SYSTEM_TEMP_UBB_FPGA_3V3_VR, + SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR, + SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR, + SYSTEM_TEMP_RETIMER_0_1_0V9_VR, + SYSTEM_TEMP_RETIMER_4_5_0V9_VR, + SYSTEM_TEMP_RETIMER_2_3_0V9_VR, + SYSTEM_TEMP_RETIMER_6_7_0V9_VR, + SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR, + SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR, + SYSTEM_TEMP_IBC_HSC, + SYSTEM_TEMP_IBC, + SYSTEM_TEMP_MAX_ENTRIES = 32 +} SYSTEM_TEMP_e; + +typedef enum{ + NODE_TEMP_RETIMER, + NODE_TEMP_IBC_TEMP, + NODE_TEMP_IBC_2_TEMP, + NODE_TEMP_VDD18_VR_TEMP, + NODE_TEMP_04_HBM_B_VR_TEMP, + NODE_TEMP_04_HBM_D_VR_TEMP, + NODE_TEMP_MAX_TEMP_ENTRIES = 12 +} NODE_TEMP_e; + +typedef enum { + SVI_VDDCR_VDD0_TEMP, + SVI_VDDCR_VDD1_TEMP, + SVI_VDDCR_VDD2_TEMP, + SVI_VDDCR_VDD3_TEMP, + SVI_VDDCR_SOC_A_TEMP, + SVI_VDDCR_SOC_C_TEMP, + SVI_VDDCR_SOCIO_A_TEMP, + SVI_VDDCR_SOCIO_C_TEMP, + SVI_VDD_085_HBM_TEMP, + SVI_VDDCR_11_HBM_B_TEMP, + SVI_VDDCR_11_HBM_D_TEMP, + SVI_VDD_USR_TEMP, + SVI_VDDIO_11_E32_TEMP, + SVI_MAX_TEMP_ENTRIES, // 13 +} SVI_TEMP_e; + +#define SMU_METRICS_TABLE_VERSION 0x14 + +#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x0 typedef struct __attribute__((packed, aligned(4))) { uint64_t AccumulationCounter; @@ -231,11 +287,27 @@ typedef struct __attribute__((packed, aligned(4))) { uint64_t GfxclkBelowHostLimitThmAcc[8]; uint64_t GfxclkBelowHostLimitTotalAcc[8]; uint64_t GfxclkLowUtilizationAcc[8]; + + uint32_t AidTemperature[4]; + uint32_t XcdTemperature[8]; + uint32_t HbmTemperature[8]; } MetricsTable_t; #define SMU_VF_METRICS_TABLE_MASK (1 << 31) #define SMU_VF_METRICS_TABLE_VERSION (0x6 | SMU_VF_METRICS_TABLE_MASK) +#pragma pack(push, 4) +typedef struct { + uint64_t AccumulationCounter; // Last update timestamp + uint16_t LabelVersion; // Defaults to 0. + uint16_t NodeIdentifier; // Unique identifier to each node on system. + int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius + int16_t spare[3]; +} SystemMetricsTable_t; +#pragma pack(pop) + typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; uint32_t InstGfxclk_TargFreq; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index e1f490b6ce64..aff2776a8b6f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -116,7 +116,11 @@ #define PPSMC_MSG_DumpErrorRecord 0x57 #define PPSMC_MSG_EraseRasTable 0x58 #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVfArbitersByIndex 0x5A +#define PPSMC_MSG_GetBadPageSeverity 0x5B +#define PPSMC_MSG_GetSystemMetricsTable 0x5C +#define PPSMC_MSG_GetSystemMetricsVersion 0x5D +#define PPSMC_Message_Count 0x5E //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41f268313613..63a088ef7169 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -94,9 +94,9 @@ #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SetThrottlingPolicy 0x44 #define PPSMC_MSG_ResetSDMA 0x4D -#define PPSMC_MSG_ResetVCN 0x4E #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVCN 0x5B +#define PPSMC_Message_Count 0x5C //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index d7a9e41820fa..2256c77da636 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -278,7 +278,8 @@ __SMU_DUMMY_MAP(MALLPowerState), \ __SMU_DUMMY_MAP(ResetSDMA), \ __SMU_DUMMY_MAP(ResetVCN), \ - __SMU_DUMMY_MAP(GetStaticMetricsTable), + __SMU_DUMMY_MAP(GetStaticMetricsTable), \ + __SMU_DUMMY_MAP(GetSystemMetricsTable), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -469,6 +470,7 @@ enum smu_feature_mask { /* Message category flags */ #define SMU_MSG_VF_FLAG (1U << 0) #define SMU_MSG_RAS_PRI (1U << 1) +#define SMU_MSG_NO_PRECHECK (1U << 2) /* Firmware capability flags */ #define SMU_FW_CAP_RAS_PRI (1U << 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 9ad46f545d15..599eddb5a67d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1897,7 +1897,7 @@ static ssize_t arcturus_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index e97b0cf19197..3baf20f4c373 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -470,7 +470,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu, static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) { uint32_t min = 0, max = 0; - uint32_t ret = 0; + int ret = 0; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetMinGfxclkFrequency, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index c63d2e28954d..b067147b7c41 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1781,7 +1781,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 02a455a31c25..0bec12b348ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -83,7 +83,6 @@ const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] = SMU_13_0_12_FEA_MAP(SMU_FEATURE_PIT_BIT, FEATURE_PIT), }; -// clang-format off const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -106,7 +105,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -138,8 +137,55 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), + MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0), }; +int smu_v13_0_12_tables_init(struct smu_context *smu) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + struct smu_table_cache *cache; + int ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), 5); + + if (ret) + return ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS, + sizeof(*baseboard_temp_metrics), 50); + if (ret) + return ret; + /* Initialize base board temperature metrics */ + cache = &(tables[SMU_TABLE_BASEBOARD_TEMP_METRICS].cache); + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) cache->buffer; + smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 0); + /* Initialize GPU board temperature metrics */ + ret = smu_table_cache_init(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS, + sizeof(*gpuboard_temp_metrics), 50); + if (ret) { + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + return ret; + } + cache = &(tables[SMU_TABLE_GPUBOARD_TEMP_METRICS].cache); + gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 *)cache->buffer; + smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0); + + return 0; +} + +void smu_v13_0_12_tables_fini(struct smu_context *smu) +{ + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); +} + static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu, uint64_t *feature_mask) { @@ -187,6 +233,11 @@ int smu_v13_0_12_get_max_metrics_size(void) return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t)); } +size_t smu_v13_0_12_get_system_metrics_size(void) +{ + return sizeof(SystemMetricsTable_t); +} + static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu, StaticMetricsTable_t *static_metrics) { @@ -220,7 +271,7 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; uint32_t table_version; - int ret, i; + int ret, i, n; if (!pptable->Init) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -259,6 +310,22 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) /* use AID0 serial number by default */ pptable->PublicSerialNumber_AID = static_metrics->PublicSerialNumber_AID[0]; + + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + static_metrics->PublicSerialNumber_AID[i]); + } + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + static_metrics->PublicSerialNumber_XCD[i]); + } + ret = smu_v13_0_12_fru_get_product_info(smu, static_metrics); if (ret) return ret; @@ -359,6 +426,248 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, return 0; } +static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + struct smu_table *tables = smu_table->tables; + struct smu_table *sys_table; + int ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + if (smu_table_cache_is_valid(sys_table)) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, NULL); + if (ret) { + dev_info(smu->adev->dev, + "Failed to export system metrics table!\n"); + return ret; + } + + amdgpu_asic_invalidate_hdp(smu->adev, NULL); + smu_table_cache_update_time(sys_table, jiffies); + memcpy(sys_table->cache.buffer, table->cpu_addr, + smu_v13_0_12_get_system_metrics_size()); + + return 0; +} + +static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e type) +{ + switch (type) { + case NODE_TEMP_RETIMER: + return AMDGPU_RETIMER_X_TEMP; + case NODE_TEMP_IBC_TEMP: + return AMDGPU_OAM_X_IBC_TEMP; + case NODE_TEMP_IBC_2_TEMP: + return AMDGPU_OAM_X_IBC_2_TEMP; + case NODE_TEMP_VDD18_VR_TEMP: + return AMDGPU_OAM_X_VDD18_VR_TEMP; + case NODE_TEMP_04_HBM_B_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_B_VR_TEMP; + case NODE_TEMP_04_HBM_D_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_D_VR_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type) +{ + switch (type) { + case SVI_VDDCR_VDD0_TEMP: + return AMDGPU_VDDCR_VDD0_TEMP; + case SVI_VDDCR_VDD1_TEMP: + return AMDGPU_VDDCR_VDD1_TEMP; + case SVI_VDDCR_VDD2_TEMP: + return AMDGPU_VDDCR_VDD2_TEMP; + case SVI_VDDCR_VDD3_TEMP: + return AMDGPU_VDDCR_VDD3_TEMP; + case SVI_VDDCR_SOC_A_TEMP: + return AMDGPU_VDDCR_SOC_A_TEMP; + case SVI_VDDCR_SOC_C_TEMP: + return AMDGPU_VDDCR_SOC_C_TEMP; + case SVI_VDDCR_SOCIO_A_TEMP: + return AMDGPU_VDDCR_SOCIO_A_TEMP; + case SVI_VDDCR_SOCIO_C_TEMP: + return AMDGPU_VDDCR_SOCIO_C_TEMP; + case SVI_VDD_085_HBM_TEMP: + return AMDGPU_VDD_085_HBM_TEMP; + case SVI_VDDCR_11_HBM_B_TEMP: + return AMDGPU_VDDCR_11_HBM_B_TEMP; + case SVI_VDDCR_11_HBM_D_TEMP: + return AMDGPU_VDDCR_11_HBM_D_TEMP; + case SVI_VDD_USR_TEMP: + return AMDGPU_VDD_USR_TEMP; + case SVI_VDDIO_11_E32_TEMP: + return AMDGPU_VDDIO_11_E32_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_system_temp smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type) +{ + switch (type) { + case SYSTEM_TEMP_UBB_FPGA: + return AMDGPU_UBB_FPGA_TEMP; + case SYSTEM_TEMP_UBB_FRONT: + return AMDGPU_UBB_FRONT_TEMP; + case SYSTEM_TEMP_UBB_BACK: + return AMDGPU_UBB_BACK_TEMP; + case SYSTEM_TEMP_UBB_OAM7: + return AMDGPU_UBB_OAM7_TEMP; + case SYSTEM_TEMP_UBB_IBC: + return AMDGPU_UBB_IBC_TEMP; + case SYSTEM_TEMP_UBB_UFPGA: + return AMDGPU_UBB_UFPGA_TEMP; + case SYSTEM_TEMP_UBB_OAM1: + return AMDGPU_UBB_OAM1_TEMP; + case SYSTEM_TEMP_OAM_0_1_HSC: + return AMDGPU_OAM_0_1_HSC_TEMP; + case SYSTEM_TEMP_OAM_2_3_HSC: + return AMDGPU_OAM_2_3_HSC_TEMP; + case SYSTEM_TEMP_OAM_4_5_HSC: + return AMDGPU_OAM_4_5_HSC_TEMP; + case SYSTEM_TEMP_OAM_6_7_HSC: + return AMDGPU_OAM_6_7_HSC_TEMP; + case SYSTEM_TEMP_UBB_FPGA_0V72_VR: + return AMDGPU_UBB_FPGA_0V72_VR_TEMP; + case SYSTEM_TEMP_UBB_FPGA_3V3_VR: + return AMDGPU_UBB_FPGA_3V3_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR: + return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR: + return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_0V9_VR: + return AMDGPU_RETIMER_0_1_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_0V9_VR: + return AMDGPU_RETIMER_4_5_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_2_3_0V9_VR: + return AMDGPU_RETIMER_2_3_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_6_7_0V9_VR: + return AMDGPU_RETIMER_6_7_0V9_VR_TEMP; + case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR: + return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP; + case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR: + return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP; + case SYSTEM_TEMP_IBC_HSC: + return AMDGPU_IBC_HSC_TEMP; + case SYSTEM_TEMP_IBC: + return AMDGPU_IBC_TEMP; + default: + return -EINVAL; + } +} + +static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu, + enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + if (smu->adev->gmc.xgmi.physical_node_id == 0 && + smu->adev->gmc.xgmi.num_physical_nodes > 1 && + smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS))) + return true; + break; + case SMU_TEMP_METRIC_GPUBOARD: + return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)); + default: + break; + } + + return false; +} + +static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu, + enum smu_temp_metric_type type, void *table) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + SystemMetricsTable_t *metrics; + struct smu_table *data_table; + struct smu_table *sys_table; + int ret, sensor_type; + u32 idx, sensors; + ssize_t size; + + if (type == SMU_TEMP_METRIC_BASEBOARD) { + /* Initialize base board temperature metrics */ + data_table = + &smu->smu_table.tables[SMU_TABLE_BASEBOARD_TEMP_METRICS]; + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } else { + data_table = + &smu->smu_table.tables[SMU_TABLE_GPUBOARD_TEMP_METRICS]; + gpuboard_temp_metrics = + (struct amdgpu_gpuboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } + + ret = smu_v13_0_12_get_system_metrics_table(smu); + if (ret) + return ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; + smu_table_cache_update_time(data_table, jiffies); + + if (type == SMU_TEMP_METRIC_GPUBOARD) { + gpuboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + gpuboard_temp_metrics->label_version = metrics->LabelVersion; + gpuboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->NodeTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_node_sensor_type(sensors); + gpuboard_temp_metrics->node_temp[idx] = + ((int)metrics->NodeTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->node_temp[idx] |= (sensor_type << 24); + idx++; + } + } + + idx = 0; + + for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->VrTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_vr_sensor_type(sensors); + gpuboard_temp_metrics->vr_temp[idx] = + ((int)metrics->VrTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->vr_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } else if (type == SMU_TEMP_METRIC_BASEBOARD) { + baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + baseboard_temp_metrics->label_version = metrics->LabelVersion; + baseboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) { + if (metrics->SystemTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_system_sensor_type(sensors); + baseboard_temp_metrics->system_temp[idx] = + ((int)metrics->SystemTemperatures[sensors]) & 0xFFFFFF; + baseboard_temp_metrics->system_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } + + memcpy(table, data_table->cache.buffer, size); + + return size; +} + ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics) { const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW; @@ -572,3 +881,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void return sizeof(*gpu_metrics); } + +const struct smu_temp_funcs smu_v13_0_12_temp_funcs = { + .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported, + .get_temp_metrics = smu_v13_0_12_get_temp_metrics, +}; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 9cc294f4708b..ebee659f8a1c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -145,7 +145,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -177,7 +177,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), - MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 0), + MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), }; // clang-format on @@ -312,6 +312,8 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); if (fw_ver >= 0x5551200) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + if (fw_ver >= 0x5551800) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); if (fw_ver >= 0x5551600) { smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); @@ -350,6 +352,13 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } + + if (fw_ver >= 0x04560700) { + if (!amdgpu_sriov_vf(smu->adev)) + smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); + } else { + smu_v13_0_12_tables_fini(smu); + } } static void smu_v13_0_6_init_caps(struct smu_context *smu) @@ -402,19 +411,37 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) if ((pgm == 7 && fw_ver >= 0x7550E00) || (pgm == 0 && fw_ver >= 0x00557E00)) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); - if ((pgm == 0 && fw_ver >= 0x00557F01) || - (pgm == 7 && fw_ver >= 0x7551000)) { - smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); - smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); + + if (amdgpu_sriov_vf(adev)) { + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); + } + } else { + if ((pgm == 0 && fw_ver >= 0x00557F01) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + } + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } - if ((pgm == 0 && fw_ver >= 0x00558000) || - (pgm == 7 && fw_ver >= 0x7551000)) - smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || ((pgm == 4) && (fw_ver >= 0x4557000))) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + + if (((pgm == 0) && (fw_ver >= 0x00558200)) || + ((pgm == 4) && (fw_ver >= 0x04557100))) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); } static void smu_v13_0_x_init_caps(struct smu_context *smu) @@ -511,8 +538,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; + void *gpu_metrics_table __free(kfree) = NULL; + void *driver_pptable __free(kfree) = NULL; + void *metrics_table __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int gpu_metrcs_size = METRICS_TABLE_SIZE; + int ret; if (!(adev->flags & AMD_IS_APU)) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, @@ -528,27 +559,36 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); - if (!smu_table->metrics_table) + SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); + + metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); + if (!metrics_table) return -ENOMEM; smu_table->metrics_time = 0; smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8); - smu_table->gpu_metrics_table = + gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); - if (!smu_table->gpu_metrics_table) { - kfree(smu_table->metrics_table); + if (!gpu_metrics_table) return -ENOMEM; - } - smu_table->driver_pptable = - kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); - if (!smu_table->driver_pptable) { - kfree(smu_table->metrics_table); - kfree(smu_table->gpu_metrics_table); + driver_pptable = kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); + if (!driver_pptable) return -ENOMEM; + + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12)) { + ret = smu_v13_0_12_tables_init(smu); + if (ret) + return ret; } + smu_table->gpu_metrics_table = no_free_ptr(gpu_metrics_table); + smu_table->metrics_table = no_free_ptr(metrics_table); + smu_table->driver_pptable = no_free_ptr(driver_pptable); + return 0; } @@ -677,6 +717,13 @@ static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) return ret; } +static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) + smu_v13_0_12_tables_fini(smu); + return smu_v13_0_fini_smc_tables(smu); +} + static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num) @@ -803,7 +850,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; int version = smu_v13_0_6_get_metrics_version(smu); - int ret, i, retry = 100; + int ret, i, retry = 100, n; uint32_t table_version; uint16_t max_speed; uint8_t max_width; @@ -865,6 +912,23 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + GET_METRIC_FIELD(PublicSerialNumber_AID, + version)[i]); + } + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + GET_METRIC_FIELD(PublicSerialNumber_XCD, + version)[i]); + } + pptable->Init = true; if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -2560,9 +2624,9 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; int version = smu_v13_0_6_get_metrics_version(smu); struct amdgpu_partition_metrics_v1_0 *xcp_metrics; + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int ret, inst, i, j, k, idx; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; @@ -2587,17 +2651,14 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, return -ENOMEM; ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); - if (ret) { - kfree(metrics_v0); + if (ret) return ret; - } if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - ret = smu_v13_0_12_get_xcp_metrics(smu, xcp, table, metrics_v0); - goto out; - } + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_xcp_metrics(smu, xcp, table, + metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2668,8 +2729,6 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, idx++; } } -out: - kfree(metrics_v0); return sizeof(*xcp_metrics); } @@ -2680,31 +2739,26 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct gpu_metrics_v1_8 *gpu_metrics = (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int version = smu_v13_0_6_get_metrics_version(smu); + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; u16 link_width_level; - ssize_t num_bytes; u8 num_jpeg_rings; u32 inst_mask; bool per_inst; metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); - if (ret) { - kfree(metrics_v0); + if (ret) return ret; - } - if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - num_bytes = smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); - kfree(metrics_v0); - return num_bytes; - } + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2890,7 +2944,6 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); *table = (void *)gpu_metrics; - kfree(metrics_v0); return sizeof(*gpu_metrics); } @@ -3076,7 +3129,7 @@ static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int var = (adev->pdev->device & 0xF); - if (var == 0x1) + if (var == 0x0 || var == 0x1 || var == 0x3) return true; return false; @@ -3152,6 +3205,11 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu) +{ + return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET)); +} + static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { int ret = 0; @@ -3797,7 +3855,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .init_microcode = smu_v13_0_6_init_microcode, .fini_microcode = smu_v13_0_fini_microcode, .init_smc_tables = smu_v13_0_6_init_smc_tables, - .fini_smc_tables = smu_v13_0_fini_smc_tables, + .fini_smc_tables = smu_v13_0_6_fini_smc_tables, .init_power = smu_v13_0_init_power, .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_6_check_fw_status, @@ -3840,6 +3898,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .reset_sdma = smu_v13_0_6_reset_sdma, .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, .dpm_reset_vcn = smu_v13_0_6_reset_vcn, + .reset_vcn_is_supported = smu_v13_0_6_reset_vcn_is_supported, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) @@ -3857,3 +3916,9 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); } + +void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) +{ + smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index 67b30674fd31..aae9a546a67e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -64,14 +64,17 @@ enum smu_v13_0_6_caps { SMU_CAP(RMA_MSG), SMU_CAP(ACA_SYND), SMU_CAP(SDMA_RESET), + SMU_CAP(VCN_RESET), SMU_CAP(STATIC_METRICS), SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(BOARD_VOLTAGE), SMU_CAP(PLDM_VERSION), + SMU_CAP(TEMP_METRICS), SMU_CAP(ALL), }; extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); +extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu); bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu); int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, @@ -79,6 +82,7 @@ int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); int smu_v13_0_12_get_max_metrics_size(void); +size_t smu_v13_0_12_get_system_metrics_size(void); int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu); int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value); @@ -86,6 +90,9 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics); +int smu_v13_0_12_tables_init(struct smu_context *smu); +void smu_v13_0_12_tables_fini(struct smu_context *smu); extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; +extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 3aea32baea3d..f32474af90b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1697,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; CustomSkuTable_t *skutable = &pptable->CustomSkuTable; - uint32_t power_limit; + uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0; uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v14_0_get_current_power_limit(smu, &power_limit)) @@ -1712,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) - *max_power_limit = msg_limit; + if (powerplay_table) { + if (smu->od_enabled && + smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = 0; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } + } + + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = msg_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - if (min_power_limit) - *min_power_limit = 0; + if (min_power_limit) { + *min_power_limit = power_limit * (100 + od_percent_lower); + *min_power_limit /= 100; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 59f9abd0f7b8..f532f7c69259 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -256,11 +256,12 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, { struct amdgpu_device *adev = smu->adev; uint32_t flags, resp; - bool fed_status; + bool fed_status, pri; flags = __smu_cmn_get_msg_flags(smu, msg); *poll = true; + pri = !!(flags & SMU_MSG_NO_PRECHECK); /* When there is RAS fatal error, FW won't process non-RAS priority * messages. Don't allow any messages other than RAS priority messages. */ @@ -272,15 +273,18 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, smu_get_message_name(smu, msg)); return -EACCES; } + } + if (pri || fed_status) { /* FW will ignore non-priority messages when a RAS fatal error - * is detected. Hence it is possible that a previous message - * wouldn't have got response. Allow to continue without polling - * for response status for priority messages. + * or reset condition is detected. Hence it is possible that a + * previous message wouldn't have got response. Allow to + * continue without polling for response status for priority + * messages. */ resp = RREG32(smu->resp_reg); dev_dbg(adev->dev, - "Sending RAS priority message %s response status: %x", + "Sending priority message %s response status: %x", smu_get_message_name(smu, msg), resp); if (resp == 0) *poll = false; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index a608cdbdada4..d588f74b98de 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -65,6 +65,32 @@ header->structure_size = sizeof(*tmp); \ } while (0) +#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + +#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + extern const int link_speed[]; /* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */ diff --git a/drivers/gpu/drm/ast/ast_2100.c b/drivers/gpu/drm/ast/ast_2100.c index 477ee15eff5d..829e3b8b0d19 100644 --- a/drivers/gpu/drm/ast/ast_2100.c +++ b/drivers/gpu/drm/ast/ast_2100.c @@ -32,6 +32,39 @@ #include "ast_post.h" /* + * DRAM type + */ + +static enum ast_dram_layout ast_2100_get_dram_layout_p2a(struct ast_device *ast) +{ + u32 mcr_cfg; + enum ast_dram_layout dram_layout; + + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + mcr_cfg = ast_read32(ast, 0x10004); + + switch (mcr_cfg & 0x0c) { + case 0: + case 4: + default: + dram_layout = AST_DRAM_512Mx16; + break; + case 8: + if (mcr_cfg & 0x40) + dram_layout = AST_DRAM_1Gx16; + else + dram_layout = AST_DRAM_512Mx32; + break; + case 0xc: + dram_layout = AST_DRAM_1Gx32; + break; + } + + return dram_layout; +} + +/* * POST */ @@ -266,6 +299,7 @@ static void ast_post_chip_2100(struct ast_device *ast) u8 j; u32 data, temp, i; const struct ast_dramstruct *dram_reg_info; + enum ast_dram_layout dram_layout = ast_2100_get_dram_layout_p2a(ast); j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); @@ -292,11 +326,17 @@ static void ast_post_chip_2100(struct ast_device *ast) for (i = 0; i < 15; i++) udelay(dram_reg_info->data); } else if (AST_DRAMSTRUCT_IS(dram_reg_info, DRAM_TYPE)) { - data = dram_reg_info->data; - if (ast->dram_type == AST_DRAM_1Gx16) + switch (dram_layout) { + case AST_DRAM_1Gx16: data = 0x00000d89; - else if (ast->dram_type == AST_DRAM_1Gx32) + break; + case AST_DRAM_1Gx32: data = 0x00000c8d; + break; + default: + data = dram_reg_info->data; + break; + } temp = ast_read32(ast, 0x12070); temp &= 0xc; diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index e37a55295ed7..c15aef014f69 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -98,13 +98,15 @@ enum ast_config_mode { ast_use_defaults }; -#define AST_DRAM_512Mx16 0 -#define AST_DRAM_1Gx16 1 -#define AST_DRAM_512Mx32 2 -#define AST_DRAM_1Gx32 3 -#define AST_DRAM_2Gx16 6 -#define AST_DRAM_4Gx16 7 -#define AST_DRAM_8Gx16 8 +enum ast_dram_layout { + AST_DRAM_512Mx16 = 0, + AST_DRAM_1Gx16 = 1, + AST_DRAM_512Mx32 = 2, + AST_DRAM_1Gx32 = 3, + AST_DRAM_2Gx16 = 6, + AST_DRAM_4Gx16 = 7, + AST_DRAM_8Gx16 = 8, +}; /* * Hardware cursor @@ -172,10 +174,6 @@ struct ast_device { enum ast_config_mode config_mode; enum ast_chip chip; - uint32_t dram_bus_width; - uint32_t dram_type; - uint32_t mclk; - void __iomem *vram; unsigned long vram_base; unsigned long vram_size; diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 44b9b5f659fc..3eea6a6cdacd 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -210,126 +210,6 @@ static void ast_detect_tx_chip(struct ast_device *ast, bool need_post) drm_info(dev, "Using %s\n", info_str[ast->tx_chip]); } -static int ast_get_dram_info(struct ast_device *ast) -{ - struct drm_device *dev = &ast->base; - struct device_node *np = dev->dev->of_node; - uint32_t mcr_cfg, mcr_scu_mpll, mcr_scu_strap; - uint32_t denum, num, div, ref_pll, dsel; - - switch (ast->config_mode) { - case ast_use_dt: - /* - * If some properties are missing, use reasonable - * defaults for GEN5 - */ - if (of_property_read_u32(np, "aspeed,mcr-configuration", - &mcr_cfg)) - mcr_cfg = 0x00000577; - if (of_property_read_u32(np, "aspeed,mcr-scu-mpll", - &mcr_scu_mpll)) - mcr_scu_mpll = 0x000050C0; - if (of_property_read_u32(np, "aspeed,mcr-scu-strap", - &mcr_scu_strap)) - mcr_scu_strap = 0; - break; - case ast_use_p2a: - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - mcr_cfg = ast_read32(ast, 0x10004); - mcr_scu_mpll = ast_read32(ast, 0x10120); - mcr_scu_strap = ast_read32(ast, 0x10170); - break; - case ast_use_defaults: - default: - ast->dram_bus_width = 16; - ast->dram_type = AST_DRAM_1Gx16; - if (IS_AST_GEN6(ast)) - ast->mclk = 800; - else - ast->mclk = 396; - return 0; - } - - if (mcr_cfg & 0x40) - ast->dram_bus_width = 16; - else - ast->dram_bus_width = 32; - - if (IS_AST_GEN6(ast)) { - switch (mcr_cfg & 0x03) { - case 0: - ast->dram_type = AST_DRAM_1Gx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_2Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_4Gx16; - break; - case 3: - ast->dram_type = AST_DRAM_8Gx16; - break; - } - } else if (IS_AST_GEN4(ast) || IS_AST_GEN5(ast)) { - switch (mcr_cfg & 0x03) { - case 0: - ast->dram_type = AST_DRAM_512Mx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_1Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_2Gx16; - break; - case 3: - ast->dram_type = AST_DRAM_4Gx16; - break; - } - } else { - switch (mcr_cfg & 0x0c) { - case 0: - case 4: - ast->dram_type = AST_DRAM_512Mx16; - break; - case 8: - if (mcr_cfg & 0x40) - ast->dram_type = AST_DRAM_1Gx16; - else - ast->dram_type = AST_DRAM_512Mx32; - break; - case 0xc: - ast->dram_type = AST_DRAM_1Gx32; - break; - } - } - - if (mcr_scu_strap & 0x2000) - ref_pll = 14318; - else - ref_pll = 12000; - - denum = mcr_scu_mpll & 0x1f; - num = (mcr_scu_mpll & 0x3fe0) >> 5; - dsel = (mcr_scu_mpll & 0xc000) >> 14; - switch (dsel) { - case 3: - div = 0x4; - break; - case 2: - case 1: - div = 0x2; - break; - default: - div = 0x1; - break; - } - ast->mclk = ref_pll * (num + 2) / ((denum + 2) * (div * 1000)); - return 0; -} - struct drm_device *ast_device_create(struct pci_dev *pdev, const struct drm_driver *drv, enum ast_chip chip, @@ -352,12 +232,6 @@ struct drm_device *ast_device_create(struct pci_dev *pdev, ast->regs = regs; ast->ioregs = ioregs; - ret = ast_get_dram_info(ast); - if (ret) - return ERR_PTR(ret); - drm_info(dev, "dram MCLK=%u Mhz type=%d bus_width=%d\n", - ast->mclk, ast->dram_type, ast->dram_bus_width); - ast_detect_tx_chip(ast, need_post); switch (ast->tx_chip) { case AST_TX_ASTDP: diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 6945029b3592..a250afd8d662 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -120,8 +120,7 @@ config DRM_ITE_IT6505 select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select EXTCON - select CRYPTO - select CRYPTO_HASH + select CRYPTO_LIB_SHA1 select REGMAP_I2C help ITE IT6505 DisplayPort bridge chip driver. diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 85ebead9809c..8be7266fd4f4 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -195,13 +195,14 @@ #define ADV7511_I2S_IEC958_DIRECT 3 #define ADV7511_PACKET(p, x) ((p) * 0x20 + (x)) -#define ADV7511_PACKET_SDP(x) ADV7511_PACKET(0, x) +#define ADV7511_PACKET_SPD(x) ADV7511_PACKET(0, x) #define ADV7511_PACKET_MPEG(x) ADV7511_PACKET(1, x) #define ADV7511_PACKET_ACP(x) ADV7511_PACKET(2, x) #define ADV7511_PACKET_ISRC1(x) ADV7511_PACKET(3, x) #define ADV7511_PACKET_ISRC2(x) ADV7511_PACKET(4, x) #define ADV7511_PACKET_GM(x) ADV7511_PACKET(5, x) -#define ADV7511_PACKET_SPARE(x) ADV7511_PACKET(6, x) +#define ADV7511_PACKET_SPARE1(x) ADV7511_PACKET(6, x) +#define ADV7511_PACKET_SPARE2(x) ADV7511_PACKET(7, x) #define ADV7511_REG_CEC_TX_FRAME_HDR 0x00 #define ADV7511_REG_CEC_TX_FRAME_DATA0 0x01 @@ -348,6 +349,7 @@ struct adv7511 { struct i2c_client *i2c_cec; struct regmap *regmap; + struct regmap *regmap_packet; struct regmap *regmap_cec; enum drm_connector_status status; bool powered; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 766b1c96bc88..87e7e820810a 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -12,6 +12,8 @@ #include <sound/soc.h> #include <linux/of_graph.h> +#include <drm/display/drm_hdmi_state_helper.h> + #include "adv7511.h" static void adv7511_calc_cts_n(unsigned int f_tmds, unsigned int fs, @@ -155,17 +157,8 @@ int adv7511_hdmi_audio_prepare(struct drm_bridge *bridge, regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG, ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4); - /* send current Audio infoframe values while updating */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), BIT(5)); - - regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(0), 0x1); - - /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), 0); - - return 0; + return drm_atomic_helper_connector_hdmi_update_audio_infoframe(connector, + &hparms->cea); } int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, @@ -188,15 +181,9 @@ int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, /* not copyrighted */ regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CFG1, BIT(5), BIT(5)); - /* enable audio infoframes */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_PACKET_ENABLE1, - BIT(3), BIT(3)); /* AV mute disable */ regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(0), BIT(7) | BIT(6), BIT(7)); - /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), 0); /* enable SPDIF receiver */ if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) @@ -214,4 +201,6 @@ void adv7511_hdmi_audio_shutdown(struct drm_bridge *bridge, if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, BIT(7), 0); + + drm_atomic_helper_connector_hdmi_clear_audio_infoframe(connector); } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 00d6417c177b..b9be86541307 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -132,6 +132,13 @@ static const struct regmap_config adv7511_regmap_config = { .volatile_reg = adv7511_register_volatile, }; +static const struct regmap_config adv7511_packet_config = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = 0xff, +}; + /* ----------------------------------------------------------------------------- * Hardware configuration */ @@ -886,9 +893,18 @@ static int adv7511_bridge_hdmi_clear_infoframe(struct drm_bridge *bridge, struct adv7511 *adv7511 = bridge_to_adv7511(bridge); switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME); + break; case HDMI_INFOFRAME_TYPE_AVI: adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_AVI_INFOFRAME); break; + case HDMI_INFOFRAME_TYPE_SPD: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPD); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + break; default: drm_dbg_driver(adv7511->bridge.dev, "Unsupported HDMI InfoFrame %x\n", type); break; @@ -903,16 +919,52 @@ static int adv7511_bridge_hdmi_write_infoframe(struct drm_bridge *bridge, { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); - adv7511_bridge_hdmi_clear_infoframe(bridge, type); - switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + /* send current Audio infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), BIT(5)); + + /* The Audio infoframe id is not configurable */ + regmap_bulk_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME_VERSION, + buffer + 1, len - 1); + + /* use Audio infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), 0); + + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME); + break; case HDMI_INFOFRAME_TYPE_AVI: + /* send current AVI infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(6), BIT(6)); + /* The AVI infoframe id is not configurable */ regmap_bulk_write(adv7511->regmap, ADV7511_REG_AVI_INFOFRAME_VERSION, buffer + 1, len - 1); + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME_LENGTH, 0x2); + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(1), 0x1); + + /* use AVI infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(6), 0); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_AVI_INFOFRAME); break; + case HDMI_INFOFRAME_TYPE_SPD: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPD); + regmap_bulk_write(adv7511->regmap_packet, ADV7511_PACKET_SPD(0), + buffer, len); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_SPD); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + regmap_bulk_write(adv7511->regmap_packet, ADV7511_PACKET_SPARE1(0), + buffer, len); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + break; default: drm_dbg_driver(adv7511->bridge.dev, "Unsupported HDMI InfoFrame %x\n", type); break; @@ -1242,6 +1294,13 @@ static int adv7511_probe(struct i2c_client *i2c) goto err_i2c_unregister_edid; } + adv7511->regmap_packet = devm_regmap_init_i2c(adv7511->i2c_packet, + &adv7511_packet_config); + if (IS_ERR(adv7511->regmap_packet)) { + ret = PTR_ERR(adv7511->regmap_packet); + goto err_i2c_unregister_packet; + } + regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, adv7511->i2c_packet->addr << 1); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index ed35e567d117..efe534977d12 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1474,8 +1474,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) dp = devm_drm_bridge_alloc(dev, struct analogix_dp_device, bridge, &analogix_dp_bridge_funcs); - if (!dp) - return ERR_PTR(-ENOMEM); + if (IS_ERR(dp)) + return ERR_CAST(dp); dp->dev = &pdev->dev; dp->dpms_mode = DRM_MODE_DPMS_OFF; diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index cced81633ddc..f1d8a8a151d8 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -6,6 +6,7 @@ config DRM_CDNS_DSI select DRM_PANEL_BRIDGE select GENERIC_PHY select GENERIC_PHY_MIPI_DPHY + select VIDEOMODE_HELPERS depends on OF help Support Cadence DPI to DSI bridge. This is an internal diff --git a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c index bea8346515b8..8f7a0d46601a 100644 --- a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c @@ -492,14 +492,12 @@ static int imx93_dsi_get_phy_configure_opts(struct imx93_dsi *dsi, static enum drm_mode_status imx93_dsi_validate_mode(struct imx93_dsi *dsi, const struct drm_display_mode *mode) { - struct drm_bridge *bridge = dw_mipi_dsi_get_bridge(dsi->dmd); + struct drm_bridge *dmd_bridge = dw_mipi_dsi_get_bridge(dsi->dmd); + struct drm_bridge *last_bridge __free(drm_bridge_put) = + drm_bridge_chain_get_last_bridge(dmd_bridge->encoder); - /* Get the last bridge */ - while (drm_bridge_get_next_bridge(bridge)) - bridge = drm_bridge_get_next_bridge(bridge); - - if ((bridge->ops & DRM_BRIDGE_OP_DETECT) && - (bridge->ops & DRM_BRIDGE_OP_EDID)) { + if ((last_bridge->ops & DRM_BRIDGE_OP_DETECT) && + (last_bridge->ops & DRM_BRIDGE_OP_EDID)) { unsigned long pixel_clock_rate = mode->clock * 1000; unsigned long rounded_rate; diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index cf813672b4ff..2eb8fba7016c 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -146,6 +146,7 @@ #define HDMI_COLOR_DEPTH_24 FIELD_PREP(HDMI_COLOR_DEPTH, 4) #define HDMI_REG_PKT_GENERAL_CTRL 0xc6 +#define HDMI_REG_PKT_NULL_CTRL 0xc9 #define HDMI_REG_AVI_INFOFRM_CTRL 0xcd #define ENABLE_PKT BIT(0) #define REPEAT_PKT BIT(1) @@ -154,6 +155,12 @@ * 3) HDMI register bank1: 0x130 ~ 0x1ff (HDMI packet registers) */ +/* NULL packet registers */ +/* Header Byte(HB): n = 0 ~ 2 */ +#define HDMI_REG_PKT_HB(n) (0x138 + (n)) +/* Packet Byte(PB): n = 0 ~ 27(HDMI_MAX_INFOFRAME_SIZE), n = 0 for checksum */ +#define HDMI_REG_PKT_PB(n) (0x13b + (n)) + /* AVI packet registers */ #define HDMI_REG_AVI_DB1 0x158 #define HDMI_REG_AVI_DB2 0x159 @@ -224,7 +231,9 @@ static bool it6263_hdmi_writeable_reg(struct device *dev, unsigned int reg) case HDMI_REG_HDMI_MODE: case HDMI_REG_GCP: case HDMI_REG_PKT_GENERAL_CTRL: + case HDMI_REG_PKT_NULL_CTRL: case HDMI_REG_AVI_INFOFRM_CTRL: + case HDMI_REG_PKT_HB(0) ... HDMI_REG_PKT_PB(HDMI_MAX_INFOFRAME_SIZE): case HDMI_REG_AVI_DB1: case HDMI_REG_AVI_DB2: case HDMI_REG_AVI_DB3: @@ -755,10 +764,16 @@ static int it6263_hdmi_clear_infoframe(struct drm_bridge *bridge, { struct it6263 *it = bridge_to_it6263(bridge); - if (type == HDMI_INFOFRAME_TYPE_AVI) + switch (type) { + case HDMI_INFOFRAME_TYPE_AVI: regmap_write(it->hdmi_regmap, HDMI_REG_AVI_INFOFRM_CTRL, 0); - else + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + regmap_write(it->hdmi_regmap, HDMI_REG_PKT_NULL_CTRL, 0); + break; + default: dev_dbg(it->dev, "unsupported HDMI infoframe 0x%x\n", type); + } return 0; } @@ -770,27 +785,36 @@ static int it6263_hdmi_write_infoframe(struct drm_bridge *bridge, struct it6263 *it = bridge_to_it6263(bridge); struct regmap *regmap = it->hdmi_regmap; - if (type != HDMI_INFOFRAME_TYPE_AVI) { + switch (type) { + case HDMI_INFOFRAME_TYPE_AVI: + /* write the first AVI infoframe data byte chunk(DB1-DB5) */ + regmap_bulk_write(regmap, HDMI_REG_AVI_DB1, + &buffer[HDMI_INFOFRAME_HEADER_SIZE], + HDMI_AVI_DB_CHUNK1_SIZE); + + /* write the second AVI infoframe data byte chunk(DB6-DB13) */ + regmap_bulk_write(regmap, HDMI_REG_AVI_DB6, + &buffer[HDMI_INFOFRAME_HEADER_SIZE + + HDMI_AVI_DB_CHUNK1_SIZE], + HDMI_AVI_DB_CHUNK2_SIZE); + + /* write checksum */ + regmap_write(regmap, HDMI_REG_AVI_CSUM, buffer[3]); + + regmap_write(regmap, HDMI_REG_AVI_INFOFRM_CTRL, + ENABLE_PKT | REPEAT_PKT); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + /* write header and payload */ + regmap_bulk_write(regmap, HDMI_REG_PKT_HB(0), buffer, len); + + regmap_write(regmap, HDMI_REG_PKT_NULL_CTRL, + ENABLE_PKT | REPEAT_PKT); + break; + default: dev_dbg(it->dev, "unsupported HDMI infoframe 0x%x\n", type); - return 0; } - /* write the first AVI infoframe data byte chunk(DB1-DB5) */ - regmap_bulk_write(regmap, HDMI_REG_AVI_DB1, - &buffer[HDMI_INFOFRAME_HEADER_SIZE], - HDMI_AVI_DB_CHUNK1_SIZE); - - /* write the second AVI infoframe data byte chunk(DB6-DB13) */ - regmap_bulk_write(regmap, HDMI_REG_AVI_DB6, - &buffer[HDMI_INFOFRAME_HEADER_SIZE + - HDMI_AVI_DB_CHUNK1_SIZE], - HDMI_AVI_DB_CHUNK2_SIZE); - - /* write checksum */ - regmap_write(regmap, HDMI_REG_AVI_CSUM, buffer[3]); - - regmap_write(regmap, HDMI_REG_AVI_INFOFRM_CTRL, ENABLE_PKT | REPEAT_PKT); - return 0; } diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 89649c17ffad..a094803ba7aa 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -21,7 +21,7 @@ #include <linux/wait.h> #include <linux/bitfield.h> -#include <crypto/hash.h> +#include <crypto/sha1.h> #include <drm/display/drm_dp_helper.h> #include <drm/display/drm_hdcp_helper.h> @@ -2107,35 +2107,6 @@ static void it6505_hdcp_part1_auth(struct it6505 *it6505) it6505->hdcp_status = HDCP_AUTH_GOING; } -static int it6505_sha1_digest(struct it6505 *it6505, u8 *sha1_input, - unsigned int size, u8 *output_av) -{ - struct shash_desc *desc; - struct crypto_shash *tfm; - int err; - struct device *dev = it6505->dev; - - tfm = crypto_alloc_shash("sha1", 0, 0); - if (IS_ERR(tfm)) { - dev_err(dev, "crypto_alloc_shash sha1 failed"); - return PTR_ERR(tfm); - } - desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tfm); - return -ENOMEM; - } - - desc->tfm = tfm; - err = crypto_shash_digest(desc, sha1_input, size, output_av); - if (err) - dev_err(dev, "crypto_shash_digest sha1 failed"); - - crypto_free_shash(tfm); - kfree(desc); - return err; -} - static int it6505_setup_sha1_input(struct it6505 *it6505, u8 *sha1_input) { struct device *dev = it6505->dev; @@ -2205,7 +2176,7 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) return false; } - it6505_sha1_digest(it6505, it6505->sha1_input, i, (u8 *)av); + sha1(it6505->sha1_input, i, (u8 *)av); /*1B-05 V' must retry 3 times */ for (retry = 0; retry < 3; retry++) { err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index b5dd71f6a990..eabc4c32f6ab 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -31,11 +31,10 @@ /* returns true iff both arguments logically differs */ #define NEQV(a, b) (!(a) ^ !(b)) -/* DSIM_STATUS */ +/* DSIM_STATUS or DSIM_DPHY_STATUS */ #define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0) #define DSIM_STOP_STATE_CLK BIT(8) #define DSIM_TX_READY_HS_CLK BIT(10) -#define DSIM_PLL_STABLE BIT(31) /* DSIM_SWRST */ #define DSIM_FUNCRST BIT(16) @@ -46,17 +45,13 @@ #define DSIM_BTA_TIMEOUT(x) ((x) << 16) /* DSIM_CLKCTRL */ -#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) -#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) -#define DSIM_LANE_ESC_CLK_EN_CLK BIT(19) -#define DSIM_LANE_ESC_CLK_EN_DATA(x) (((x) & 0xf) << 20) -#define DSIM_LANE_ESC_CLK_EN_DATA_MASK (0xf << 20) -#define DSIM_BYTE_CLKEN BIT(24) -#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) -#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) -#define DSIM_PLL_BYPASS BIT(27) -#define DSIM_ESC_CLKEN BIT(28) -#define DSIM_TX_REQUEST_HSCLK BIT(31) +#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) +#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) +#define DSIM_LANE_ESC_CLK_EN_DATA(x, offset) (((x) & 0xf) << offset) +#define DSIM_LANE_ESC_CLK_EN_DATA_MASK(offset) (0xf << offset) +#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) +#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) +#define DSIM_PLL_BYPASS BIT(27) /* DSIM_CONFIG */ #define DSIM_LANE_EN_CLK BIT(0) @@ -91,7 +86,6 @@ */ #define DSIM_HSE_DISABLE_MODE BIT(23) #define DSIM_AUTO_MODE BIT(24) -#define DSIM_VIDEO_MODE BIT(25) #define DSIM_BURST_MODE BIT(26) #define DSIM_SYNC_INFORM BIT(27) #define DSIM_EOT_DISABLE BIT(28) @@ -129,9 +123,9 @@ #define DSIM_MAIN_HBP_MASK ((0xffff) << 0) /* DSIM_MSYNC */ -#define DSIM_MAIN_VSA(x) ((x) << 22) +#define DSIM_MAIN_VSA(x, offset) ((x) << offset) #define DSIM_MAIN_HSA(x) ((x) << 0) -#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22) +#define DSIM_MAIN_VSA_MASK(offset) ((0x3ff) << offset) #define DSIM_MAIN_HSA_MASK ((0xffff) << 0) /* DSIM_SDRESOL */ @@ -157,6 +151,11 @@ #define DSIM_INT_RX_ECC_ERR BIT(15) #define DSIM_INT_RX_CRC_ERR BIT(14) +/* DSIM_SFRCTRL */ +#define DSIM_SFR_CTRL_STAND_BY BIT(4) +#define DSIM_SFR_CTRL_SHADOW_UPDATE BIT(1) +#define DSIM_SFR_CTRL_SHADOW_EN BIT(0) + /* DSIM_FIFOCTRL */ #define DSIM_RX_DATA_FULL BIT(25) #define DSIM_RX_DATA_EMPTY BIT(24) @@ -191,9 +190,7 @@ #define DSIM_PLL_DPDNSWAP_DAT (1 << 24) #define DSIM_FREQ_BAND(x) ((x) << 24) #define DSIM_PLL_EN BIT(23) -#define DSIM_PLL_P(x, offset) ((x) << (offset)) -#define DSIM_PLL_M(x) ((x) << 4) -#define DSIM_PLL_S(x) ((x) << 1) +#define DSIM_PLL(x, offset) ((x) << (offset)) /* DSIM_PHYCTRL */ #define DSIM_PHYCTRL_ULPS_EXIT(x) (((x) & 0x1ff) << 0) @@ -222,25 +219,42 @@ #define DSI_XFER_TIMEOUT_MS 100 #define DSI_RX_FIFO_EMPTY 0x30800002 -#define OLD_SCLK_MIPI_CLK_NAME "pll_clk" - #define PS_TO_CYCLE(ps, hz) DIV64_U64_ROUND_CLOSEST(((ps) * (hz)), 1000000000000ULL) -static const char *const clk_names[5] = { - "bus_clk", - "sclk_mipi", - "phyclk_mipidphy0_bitclkdiv8", - "phyclk_mipidphy0_rxclkesc0", - "sclk_rgb_vclk_to_dsim0" -}; - enum samsung_dsim_transfer_type { EXYNOS_DSI_TX, EXYNOS_DSI_RX, }; +static struct clk_bulk_data exynos3_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "pll_clk" }, +}; + +static struct clk_bulk_data exynos4_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, +}; + +static struct clk_bulk_data exynos5433_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, + { .id = "phyclk_mipidphy0_bitclkdiv8" }, + { .id = "phyclk_mipidphy0_rxclkesc0" }, + { .id = "sclk_rgb_vclk_to_dsim0" }, +}; + +static struct clk_bulk_data exynos7870_clk_bulk_data[] = { + { .id = "bus" }, + { .id = "pll" }, + { .id = "byte" }, + { .id = "esc" }, +}; + enum reg_idx { - DSIM_STATUS_REG, /* Status register */ + DSIM_STATUS_REG, /* Status register (legacy) */ + DSIM_LINK_STATUS_REG, /* Link status register */ + DSIM_DPHY_STATUS_REG, /* D-PHY status register */ DSIM_SWRST_REG, /* Software reset register */ DSIM_CLKCTRL_REG, /* Clock control register */ DSIM_TIMEOUT_REG, /* Time out register */ @@ -255,6 +269,7 @@ enum reg_idx { DSIM_PKTHDR_REG, /* Packet Header FIFO register */ DSIM_PAYLOAD_REG, /* Payload FIFO register */ DSIM_RXFIFO_REG, /* Read FIFO register */ + DSIM_SFRCTRL_REG, /* SFR standby and shadow control register */ DSIM_FIFOCTRL_REG, /* FIFO status and control register */ DSIM_PLLCTRL_REG, /* PLL control register */ DSIM_PHYCTRL_REG, @@ -312,6 +327,32 @@ static const unsigned int exynos5433_reg_ofs[] = { [DSIM_PHYTIMING2_REG] = 0xBC, }; +static const unsigned int exynos7870_reg_ofs[] = { + [DSIM_LINK_STATUS_REG] = 0x04, + [DSIM_DPHY_STATUS_REG] = 0x08, + [DSIM_SWRST_REG] = 0x0C, + [DSIM_CLKCTRL_REG] = 0x10, + [DSIM_TIMEOUT_REG] = 0x14, + [DSIM_ESCMODE_REG] = 0x1C, + [DSIM_MDRESOL_REG] = 0x20, + [DSIM_MVPORCH_REG] = 0x24, + [DSIM_MHPORCH_REG] = 0x28, + [DSIM_MSYNC_REG] = 0x2C, + [DSIM_CONFIG_REG] = 0x30, + [DSIM_INTSRC_REG] = 0x34, + [DSIM_INTMSK_REG] = 0x38, + [DSIM_PKTHDR_REG] = 0x3C, + [DSIM_PAYLOAD_REG] = 0x40, + [DSIM_RXFIFO_REG] = 0x44, + [DSIM_SFRCTRL_REG] = 0x48, + [DSIM_FIFOCTRL_REG] = 0x4C, + [DSIM_PLLCTRL_REG] = 0x94, + [DSIM_PHYCTRL_REG] = 0xA4, + [DSIM_PHYTIMING_REG] = 0xB4, + [DSIM_PHYTIMING1_REG] = 0xB8, + [DSIM_PHYTIMING2_REG] = 0xBC, +}; + enum reg_value_idx { RESET_TYPE, PLL_TIMER, @@ -384,6 +425,24 @@ static const unsigned int exynos5433_reg_values[] = { [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), }; +static const unsigned int exynos7870_reg_values[] = { + [RESET_TYPE] = DSIM_SWRST, + [PLL_TIMER] = 80000, + [STOP_STATE_CNT] = 0xa, + [PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0x177), + [PHYCTRL_VREG_LP] = 0, + [PHYCTRL_SLEW_UP] = 0, + [PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x07), + [PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0c), + [PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x08), + [PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x2b), + [PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0d), + [PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x09), + [PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x09), + [PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x0f), + [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), +}; + static const unsigned int imx8mm_dsim_reg_values[] = { [RESET_TYPE] = DSIM_SWRST, [PLL_TIMER] = 500, @@ -405,13 +464,26 @@ static const unsigned int imx8mm_dsim_reg_values[] = { static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -424,13 +496,26 @@ static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -443,11 +528,24 @@ static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x58, - .num_clks = 2, + .has_legacy_status_reg = 1, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -459,12 +557,25 @@ static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 5, + .clk_data = exynos5433_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos5433_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5433_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -476,12 +587,25 @@ static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5422_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -490,19 +614,62 @@ static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .min_freq = 500, }; +static const struct samsung_dsim_driver_data exynos7870_dsi_driver_data = { + .reg_ofs = exynos7870_reg_ofs, + .plltmr_reg = 0xa0, + .has_clklane_stop = 1, + .has_sfrctrl = 1, + .clk_data = exynos7870_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos7870_clk_bulk_data), + .max_freq = 1500, + .wait_for_hdr_fifo = 0, + .wait_for_reset = 1, + .num_bits_resol = 12, + .video_mode_bit = 18, + .pll_stable_bit = 24, + .esc_clken_bit = 16, + .byte_clken_bit = 17, + .tx_req_hsclk_bit = 20, + .lane_esc_clk_bit = 8, + .lane_esc_data_offset = 9, + .pll_p_offset = 13, + .pll_m_offset = 3, + .pll_s_offset = 0, + .main_vsa_offset = 16, + .reg_values = exynos7870_reg_values, + .pll_fin_min = 6, + .pll_fin_max = 12, + .m_min = 41, + .m_max = 125, + .min_freq = 500, +}; + static const struct samsung_dsim_driver_data imx8mm_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 2100, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, /* * Unlike Exynos, PLL_P(PMS_P) offset 14 is used in i.MX8M Mini/Nano/Plus * downstream driver - drivers/gpu/drm/bridge/sec-dsim.c */ .pll_p_offset = 14, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = imx8mm_dsim_reg_values, .pll_fin_min = 2, .pll_fin_max = 30, @@ -518,6 +685,7 @@ samsung_dsim_types[DSIM_TYPE_COUNT] = { [DSIM_TYPE_EXYNOS5410] = &exynos5_dsi_driver_data, [DSIM_TYPE_EXYNOS5422] = &exynos5422_dsi_driver_data, [DSIM_TYPE_EXYNOS5433] = &exynos5433_dsi_driver_data, + [DSIM_TYPE_EXYNOS7870] = &exynos7870_dsi_driver_data, [DSIM_TYPE_IMX8MM] = &imx8mm_dsi_driver_data, [DSIM_TYPE_IMX8MP] = &imx8mm_dsi_driver_data, }; @@ -653,8 +821,9 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, writel(driver_data->reg_values[PLL_TIMER], dsi->reg_base + driver_data->plltmr_reg); - reg = DSIM_PLL_EN | DSIM_PLL_P(p, driver_data->pll_p_offset) | - DSIM_PLL_M(m) | DSIM_PLL_S(s); + reg = DSIM_PLL_EN | DSIM_PLL(p, driver_data->pll_p_offset) + | DSIM_PLL(m, driver_data->pll_m_offset) + | DSIM_PLL(s, driver_data->pll_s_offset); if (driver_data->has_freqband) { static const unsigned long freq_bands[] = { @@ -682,14 +851,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, samsung_dsim_write(dsi, DSIM_PLLCTRL_REG, reg); - timeout = 1000; + timeout = 3000; do { if (timeout-- == 0) { dev_err(dsi->dev, "PLL failed to stabilize\n"); return 0; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); - } while ((reg & DSIM_PLL_STABLE) == 0); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_LINK_STATUS_REG); + } while ((reg & BIT(driver_data->pll_stable_bit)) == 0); dsi->hs_clock = fout; @@ -698,6 +870,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; unsigned long hs_clk, byte_clk, esc_clk, pix_clk; unsigned long esc_div; u32 reg; @@ -731,15 +904,17 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) hs_clk, byte_clk, esc_clk); reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS - | DSIM_BYTE_CLK_SRC_MASK); - reg |= DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN - | DSIM_ESC_PRESCALER(esc_div) - | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1) - | DSIM_BYTE_CLK_SRC(0) - | DSIM_TX_REQUEST_HSCLK; + reg &= ~(DSIM_ESC_PRESCALER_MASK | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | DSIM_PLL_BYPASS + | DSIM_BYTE_CLK_SRC_MASK); + reg |= BIT(driver_data->esc_clken_bit) | BIT(driver_data->byte_clken_bit) + | DSIM_ESC_PRESCALER(esc_div) + | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1, + driver_data->lane_esc_data_offset) + | DSIM_BYTE_CLK_SRC(0) + | BIT(driver_data->tx_req_hsclk_bit); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); return 0; @@ -843,11 +1018,14 @@ static void samsung_dsim_set_phy_ctrl(struct samsung_dsim *dsi) static void samsung_dsim_disable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK - | DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN); + reg &= ~(BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | BIT(driver_data->esc_clken_bit) + | BIT(driver_data->byte_clken_bit)); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); reg = samsung_dsim_read(dsi, DSIM_PLLCTRL_REG); @@ -891,7 +1069,7 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) * mode, otherwise it will support command mode. */ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { - reg |= DSIM_VIDEO_MODE; + reg |= BIT(driver_data->video_mode_bit); /* * The user manual describes that following bits are ignored in @@ -962,7 +1140,10 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) return -EFAULT; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_DPHY_STATUS_REG); if ((reg & DSIM_STOP_STATE_DAT(lanes_mask)) != DSIM_STOP_STATE_DAT(lanes_mask)) continue; @@ -983,6 +1164,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) { struct drm_display_mode *m = &dsi->mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; + unsigned int main_vsa_offset = dsi->driver_data->main_vsa_offset; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { @@ -1009,7 +1191,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) reg = DSIM_MAIN_HFP(hfp) | DSIM_MAIN_HBP(hbp); samsung_dsim_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start, main_vsa_offset) | DSIM_MAIN_HSA(hsa); samsung_dsim_write(dsi, DSIM_MSYNC_REG, reg); } @@ -1023,6 +1205,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enable) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_MDRESOL_REG); @@ -1031,6 +1214,15 @@ static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enabl else reg &= ~DSIM_MAIN_STAND_BY; samsung_dsim_write(dsi, DSIM_MDRESOL_REG, reg); + + if (driver_data->has_sfrctrl) { + reg = samsung_dsim_read(dsi, DSIM_SFRCTRL_REG); + if (enable) + reg |= DSIM_SFR_CTRL_STAND_BY; + else + reg &= ~DSIM_SFR_CTRL_STAND_BY; + samsung_dsim_write(dsi, DSIM_SFRCTRL_REG, reg); + } } static int samsung_dsim_wait_for_hdr_fifo(struct samsung_dsim *dsi) @@ -1087,6 +1279,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, { struct device *dev = dsi->dev; struct mipi_dsi_packet *pkt = &xfer->packet; + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; const u8 *payload = pkt->payload + xfer->tx_done; u16 length = pkt->payload_length - xfer->tx_done; bool first = !xfer->tx_done; @@ -1127,9 +1320,11 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, return; reg = get_unaligned_le32(pkt->header); - if (samsung_dsim_wait_for_hdr_fifo(dsi)) { - dev_err(dev, "waiting for header FIFO timed out\n"); - return; + if (driver_data->wait_for_hdr_fifo) { + if (samsung_dsim_wait_for_hdr_fifo(dsi)) { + dev_err(dev, "waiting for header FIFO timed out\n"); + return; + } } if (NEQV(xfer->flags & MIPI_DSI_MSG_USE_LPM, @@ -1922,7 +2117,7 @@ int samsung_dsim_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct samsung_dsim *dsi; - int ret, i; + int ret; dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs); if (IS_ERR(dsi)) @@ -1946,23 +2141,11 @@ int samsung_dsim_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to get regulators\n"); - dsi->clks = devm_kcalloc(dev, dsi->driver_data->num_clks, - sizeof(*dsi->clks), GFP_KERNEL); - if (!dsi->clks) - return -ENOMEM; - - for (i = 0; i < dsi->driver_data->num_clks; i++) { - dsi->clks[i] = devm_clk_get(dev, clk_names[i]); - if (IS_ERR(dsi->clks[i])) { - if (strcmp(clk_names[i], "sclk_mipi") == 0) { - dsi->clks[i] = devm_clk_get(dev, OLD_SCLK_MIPI_CLK_NAME); - if (!IS_ERR(dsi->clks[i])) - continue; - } - - dev_info(dev, "failed to get the clock: %s\n", clk_names[i]); - return PTR_ERR(dsi->clks[i]); - } + ret = devm_clk_bulk_get(dev, dsi->driver_data->num_clks, + dsi->driver_data->clk_data); + if (ret) { + dev_err(dev, "failed to get clocks in bulk (%d)\n", ret); + return ret; } dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); @@ -2035,7 +2218,7 @@ static int samsung_dsim_suspend(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; usleep_range(10000, 20000); @@ -2051,8 +2234,7 @@ static int samsung_dsim_suspend(struct device *dev) phy_power_off(dsi->phy); - for (i = driver_data->num_clks - 1; i > -1; i--) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) @@ -2065,7 +2247,7 @@ static int samsung_dsim_resume(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) { @@ -2073,11 +2255,9 @@ static int samsung_dsim_resume(struct device *dev) return ret; } - for (i = 0; i < driver_data->num_clks; i++) { - ret = clk_prepare_enable(dsi->clks[i]); - if (ret < 0) - goto err_clk; - } + ret = clk_bulk_prepare_enable(driver_data->num_clks, driver_data->clk_data); + if (ret < 0) + goto err_clk; ret = phy_power_on(dsi->phy); if (ret < 0) { @@ -2088,8 +2268,7 @@ static int samsung_dsim_resume(struct device *dev) return 0; err_clk: - while (--i > -1) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); return ret; diff --git a/drivers/gpu/drm/bridge/simple-bridge.c b/drivers/gpu/drm/bridge/simple-bridge.c index 3d15ddd39470..e4d0bc2200f8 100644 --- a/drivers/gpu/drm/bridge/simple-bridge.c +++ b/drivers/gpu/drm/bridge/simple-bridge.c @@ -262,6 +262,16 @@ static const struct of_device_id simple_bridge_match[] = { .connector_type = DRM_MODE_CONNECTOR_VGA, }, }, { + .compatible = "radxa,ra620", + .data = &(const struct simple_bridge_info) { + .connector_type = DRM_MODE_CONNECTOR_HDMIA, + }, + }, { + .compatible = "realtek,rtd2171", + .data = &(const struct simple_bridge_info) { + .connector_type = DRM_MODE_CONNECTOR_HDMIA, + }, + }, { .compatible = "ti,opa362", .data = &(const struct simple_bridge_info) { .connector_type = DRM_MODE_CONNECTOR_Composite, diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index f3ab2f985f8c..2c5e532410de 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,4 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only +config DRM_DW_DP + tristate + select DRM_DISPLAY_HELPER + select DRM_DISPLAY_DP_HELPER + select DRM_KMS_HELPER + select REGMAP_MMIO + config DRM_DW_HDMI tristate select DRM_DISPLAY_HDMI_HELPER diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile index 9dc376d220ad..4dada44029ac 100644 --- a/drivers/gpu/drm/bridge/synopsys/Makefile +++ b/drivers/gpu/drm/bridge/synopsys/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_DRM_DW_DP) += dw-dp.o obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o obj-$(CONFIG_DRM_DW_HDMI_GP_AUDIO) += dw-hdmi-gp-audio.o diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c new file mode 100644 index 000000000000..9bbfe8da3de0 --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c @@ -0,0 +1,2095 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synopsys DesignWare Cores DisplayPort Transmitter Controller + * + * Copyright (c) 2025 Rockchip Electronics Co., Ltd. + * + * Author: Andy Yan <andy.yan@rock-chips.com> + */ +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/irq.h> +#include <linux/media-bus-format.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/phy/phy.h> +#include <linux/unaligned.h> + +#include <drm/bridge/dw_dp.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> +#include <drm/display/drm_dp_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_of.h> +#include <drm/drm_print.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> + +#define DW_DP_VERSION_NUMBER 0x0000 +#define DW_DP_VERSION_TYPE 0x0004 +#define DW_DP_ID 0x0008 + +#define DW_DP_CONFIG_REG1 0x0100 +#define DW_DP_CONFIG_REG2 0x0104 +#define DW_DP_CONFIG_REG3 0x0108 + +#define DW_DP_CCTL 0x0200 +#define FORCE_HPD BIT(4) +#define DEFAULT_FAST_LINK_TRAIN_EN BIT(2) +#define ENHANCE_FRAMING_EN BIT(1) +#define SCRAMBLE_DIS BIT(0) +#define DW_DP_SOFT_RESET_CTRL 0x0204 +#define VIDEO_RESET BIT(5) +#define AUX_RESET BIT(4) +#define AUDIO_SAMPLER_RESET BIT(3) +#define HDCP_MODULE_RESET BIT(2) +#define PHY_SOFT_RESET BIT(1) +#define CONTROLLER_RESET BIT(0) + +#define DW_DP_VSAMPLE_CTRL 0x0300 +#define PIXEL_MODE_SELECT GENMASK(22, 21) +#define VIDEO_MAPPING GENMASK(20, 16) +#define VIDEO_STREAM_ENABLE BIT(5) + +#define DW_DP_VSAMPLE_STUFF_CTRL1 0x0304 + +#define DW_DP_VSAMPLE_STUFF_CTRL2 0x0308 + +#define DW_DP_VINPUT_POLARITY_CTRL 0x030c +#define DE_IN_POLARITY BIT(2) +#define HSYNC_IN_POLARITY BIT(1) +#define VSYNC_IN_POLARITY BIT(0) + +#define DW_DP_VIDEO_CONFIG1 0x0310 +#define HACTIVE GENMASK(31, 16) +#define HBLANK GENMASK(15, 2) +#define I_P BIT(1) +#define R_V_BLANK_IN_OSC BIT(0) + +#define DW_DP_VIDEO_CONFIG2 0x0314 +#define VBLANK GENMASK(31, 16) +#define VACTIVE GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG3 0x0318 +#define H_SYNC_WIDTH GENMASK(31, 16) +#define H_FRONT_PORCH GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG4 0x031c +#define V_SYNC_WIDTH GENMASK(31, 16) +#define V_FRONT_PORCH GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG5 0x0320 +#define INIT_THRESHOLD_HI GENMASK(22, 21) +#define AVERAGE_BYTES_PER_TU_FRAC GENMASK(19, 16) +#define INIT_THRESHOLD GENMASK(13, 7) +#define AVERAGE_BYTES_PER_TU GENMASK(6, 0) + +#define DW_DP_VIDEO_MSA1 0x0324 +#define VSTART GENMASK(31, 16) +#define HSTART GENMASK(15, 0) + +#define DW_DP_VIDEO_MSA2 0x0328 +#define MISC0 GENMASK(31, 24) + +#define DW_DP_VIDEO_MSA3 0x032c +#define MISC1 GENMASK(31, 24) + +#define DW_DP_VIDEO_HBLANK_INTERVAL 0x0330 +#define HBLANK_INTERVAL_EN BIT(16) +#define HBLANK_INTERVAL GENMASK(15, 0) + +#define DW_DP_AUD_CONFIG1 0x0400 +#define AUDIO_TIMESTAMP_VERSION_NUM GENMASK(29, 24) +#define AUDIO_PACKET_ID GENMASK(23, 16) +#define AUDIO_MUTE BIT(15) +#define NUM_CHANNELS GENMASK(14, 12) +#define HBR_MODE_ENABLE BIT(10) +#define AUDIO_DATA_WIDTH GENMASK(9, 5) +#define AUDIO_DATA_IN_EN GENMASK(4, 1) +#define AUDIO_INF_SELECT BIT(0) + +#define DW_DP_SDP_VERTICAL_CTRL 0x0500 +#define EN_VERTICAL_SDP BIT(2) +#define EN_AUDIO_STREAM_SDP BIT(1) +#define EN_AUDIO_TIMESTAMP_SDP BIT(0) +#define DW_DP_SDP_HORIZONTAL_CTRL 0x0504 +#define EN_HORIZONTAL_SDP BIT(2) +#define DW_DP_SDP_STATUS_REGISTER 0x0508 +#define DW_DP_SDP_MANUAL_CTRL 0x050c +#define DW_DP_SDP_STATUS_EN 0x0510 + +#define DW_DP_SDP_REGISTER_BANK 0x0600 +#define SDP_REGS GENMASK(31, 0) + +#define DW_DP_PHYIF_CTRL 0x0a00 +#define PHY_WIDTH BIT(25) +#define PHY_POWERDOWN GENMASK(20, 17) +#define PHY_BUSY GENMASK(15, 12) +#define SSC_DIS BIT(16) +#define XMIT_ENABLE GENMASK(11, 8) +#define PHY_LANES GENMASK(7, 6) +#define PHY_RATE GENMASK(5, 4) +#define TPS_SEL GENMASK(3, 0) + +#define DW_DP_PHY_TX_EQ 0x0a04 +#define DW_DP_CUSTOMPAT0 0x0a08 +#define DW_DP_CUSTOMPAT1 0x0a0c +#define DW_DP_CUSTOMPAT2 0x0a10 +#define DW_DP_HBR2_COMPLIANCE_SCRAMBLER_RESET 0x0a14 +#define DW_DP_PHYIF_PWRDOWN_CTRL 0x0a18 + +#define DW_DP_AUX_CMD 0x0b00 +#define AUX_CMD_TYPE GENMASK(31, 28) +#define AUX_ADDR GENMASK(27, 8) +#define I2C_ADDR_ONLY BIT(4) +#define AUX_LEN_REQ GENMASK(3, 0) + +#define DW_DP_AUX_STATUS 0x0b04 +#define AUX_TIMEOUT BIT(17) +#define AUX_BYTES_READ GENMASK(23, 19) +#define AUX_STATUS GENMASK(7, 4) + +#define DW_DP_AUX_DATA0 0x0b08 +#define DW_DP_AUX_DATA1 0x0b0c +#define DW_DP_AUX_DATA2 0x0b10 +#define DW_DP_AUX_DATA3 0x0b14 + +#define DW_DP_GENERAL_INTERRUPT 0x0d00 +#define VIDEO_FIFO_OVERFLOW_STREAM0 BIT(6) +#define AUDIO_FIFO_OVERFLOW_STREAM0 BIT(5) +#define SDP_EVENT_STREAM0 BIT(4) +#define AUX_CMD_INVALID BIT(3) +#define HDCP_EVENT BIT(2) +#define AUX_REPLY_EVENT BIT(1) +#define HPD_EVENT BIT(0) + +#define DW_DP_GENERAL_INTERRUPT_ENABLE 0x0d04 +#define HDCP_EVENT_EN BIT(2) +#define AUX_REPLY_EVENT_EN BIT(1) +#define HPD_EVENT_EN BIT(0) + +#define DW_DP_HPD_STATUS 0x0d08 +#define HPD_STATE GENMASK(11, 9) +#define HPD_STATUS BIT(8) +#define HPD_HOT_UNPLUG BIT(2) +#define HPD_HOT_PLUG BIT(1) +#define HPD_IRQ BIT(0) + +#define DW_DP_HPD_INTERRUPT_ENABLE 0x0d0c +#define HPD_UNPLUG_ERR_EN BIT(3) +#define HPD_UNPLUG_EN BIT(2) +#define HPD_PLUG_EN BIT(1) +#define HPD_IRQ_EN BIT(0) + +#define DW_DP_HDCP_CFG 0x0e00 +#define DPCD12PLUS BIT(7) +#define CP_IRQ BIT(6) +#define BYPENCRYPTION BIT(5) +#define HDCP_LOCK BIT(4) +#define ENCRYPTIONDISABLE BIT(3) +#define ENABLE_HDCP_13 BIT(2) +#define ENABLE_HDCP BIT(1) + +#define DW_DP_HDCP_OBS 0x0e04 +#define HDCP22_RE_AUTHENTICATION_REQ BIT(31) +#define HDCP22_AUTHENTICATION_FAILED BIT(30) +#define HDCP22_AUTHENTICATION_SUCCESS BIT(29) +#define HDCP22_CAPABLE_SINK BIT(28) +#define HDCP22_SINK_CAP_CHECK_COMPLETE BIT(27) +#define HDCP22_STATE GENMASK(26, 24) +#define HDCP22_BOOTED BIT(23) +#define HDCP13_BSTATUS GENMASK(22, 19) +#define REPEATER BIT(18) +#define HDCP_CAPABLE BIT(17) +#define STATEE GENMASK(16, 14) +#define STATEOEG GENMASK(13, 11) +#define STATER GENMASK(10, 8) +#define STATEA GENMASK(7, 4) +#define SUBSTATEA GENMASK(3, 1) +#define HDCPENGAGED BIT(0) + +#define DW_DP_HDCP_APIINTCLR 0x0e08 +#define DW_DP_HDCP_APIINTSTAT 0x0e0c +#define DW_DP_HDCP_APIINTMSK 0x0e10 +#define HDCP22_GPIOINT BIT(8) +#define HDCP_ENGAGED BIT(7) +#define HDCP_FAILED BIT(6) +#define KSVSHA1CALCDONEINT BIT(5) +#define AUXRESPNACK7TIMES BIT(4) +#define AUXRESPTIMEOUT BIT(3) +#define AUXRESPDEFER7TIMES BIT(2) +#define KSVACCESSINT BIT(0) + +#define DW_DP_HDCP_KSVMEMCTRL 0x0e18 +#define KSVSHA1STATUS BIT(4) +#define KSVMEMACCESS BIT(1) +#define KSVMEMREQUEST BIT(0) + +#define DW_DP_HDCP_REG_BKSV0 0x3600 +#define DW_DP_HDCP_REG_BKSV1 0x3604 +#define DW_DP_HDCP_REG_ANCONF 0x3608 +#define AN_BYPASS BIT(0) + +#define DW_DP_HDCP_REG_AN0 0x360c +#define DW_DP_HDCP_REG_AN1 0x3610 +#define DW_DP_HDCP_REG_RMLCTL 0x3614 +#define ODPK_DECRYPT_ENABLE BIT(0) + +#define DW_DP_HDCP_REG_RMLSTS 0x3618 +#define IDPK_WR_OK_STS BIT(6) +#define IDPK_DATA_INDEX GENMASK(5, 0) +#define DW_DP_HDCP_REG_SEED 0x361c +#define DW_DP_HDCP_REG_DPK0 0x3620 +#define DW_DP_HDCP_REG_DPK1 0x3624 +#define DW_DP_HDCP22_GPIOSTS 0x3628 +#define DW_DP_HDCP22_GPIOCHNGSTS 0x362c +#define DW_DP_HDCP_REG_DPK_CRC 0x3630 + +#define DW_DP_MAX_REGISTER DW_DP_HDCP_REG_DPK_CRC + +#define SDP_REG_BANK_SIZE 16 + +struct dw_dp_link_caps { + bool enhanced_framing; + bool tps3_supported; + bool tps4_supported; + bool fast_training; + bool channel_coding; + bool ssc; +}; + +struct dw_dp_link_train_set { + unsigned int voltage_swing[4]; + unsigned int pre_emphasis[4]; + bool voltage_max_reached[4]; + bool pre_max_reached[4]; +}; + +struct dw_dp_link_train { + struct dw_dp_link_train_set adjust; + bool clock_recovered; + bool channel_equalized; +}; + +struct dw_dp_link { + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + unsigned char revision; + unsigned int rate; + unsigned int lanes; + u8 sink_count; + u8 vsc_sdp_supported; + struct dw_dp_link_caps caps; + struct dw_dp_link_train train; + struct drm_dp_desc desc; +}; + +struct dw_dp_bridge_state { + struct drm_bridge_state base; + struct drm_display_mode mode; + u8 video_mapping; + u8 color_format; + u8 bpc; + u8 bpp; +}; + +struct dw_dp_sdp { + struct dp_sdp base; + unsigned long flags; +}; + +struct dw_dp_hotplug { + bool long_hpd; +}; + +struct dw_dp { + struct drm_bridge bridge; + struct device *dev; + struct regmap *regmap; + struct phy *phy; + struct clk *apb_clk; + struct clk *aux_clk; + struct clk *i2s_clk; + struct clk *spdif_clk; + struct clk *hdcp_clk; + struct reset_control *rstc; + struct completion complete; + int irq; + struct work_struct hpd_work; + struct dw_dp_hotplug hotplug; + /* Serialize hpd status access */ + struct mutex irq_lock; + + struct drm_dp_aux aux; + + struct dw_dp_link link; + struct dw_dp_plat_data plat_data; + u8 pixel_mode; + + DECLARE_BITMAP(sdp_reg_bank, SDP_REG_BANK_SIZE); +}; + +enum { + DW_DP_RGB_6BIT, + DW_DP_RGB_8BIT, + DW_DP_RGB_10BIT, + DW_DP_RGB_12BIT, + DW_DP_RGB_16BIT, + DW_DP_YCBCR444_8BIT, + DW_DP_YCBCR444_10BIT, + DW_DP_YCBCR444_12BIT, + DW_DP_YCBCR444_16BIT, + DW_DP_YCBCR422_8BIT, + DW_DP_YCBCR422_10BIT, + DW_DP_YCBCR422_12BIT, + DW_DP_YCBCR422_16BIT, + DW_DP_YCBCR420_8BIT, + DW_DP_YCBCR420_10BIT, + DW_DP_YCBCR420_12BIT, + DW_DP_YCBCR420_16BIT, +}; + +enum { + DW_DP_MP_SINGLE_PIXEL, + DW_DP_MP_DUAL_PIXEL, + DW_DP_MP_QUAD_PIXEL, +}; + +enum { + DW_DP_SDP_VERTICAL_INTERVAL = BIT(0), + DW_DP_SDP_HORIZONTAL_INTERVAL = BIT(1), +}; + +enum { + DW_DP_HPD_STATE_IDLE, + DW_DP_HPD_STATE_UNPLUG, + DP_DP_HPD_STATE_TIMEOUT = 4, + DW_DP_HPD_STATE_PLUG = 7 +}; + +enum { + DW_DP_PHY_PATTERN_NONE, + DW_DP_PHY_PATTERN_TPS_1, + DW_DP_PHY_PATTERN_TPS_2, + DW_DP_PHY_PATTERN_TPS_3, + DW_DP_PHY_PATTERN_TPS_4, + DW_DP_PHY_PATTERN_SERM, + DW_DP_PHY_PATTERN_PBRS7, + DW_DP_PHY_PATTERN_CUSTOM_80BIT, + DW_DP_PHY_PATTERN_CP2520_1, + DW_DP_PHY_PATTERN_CP2520_2, +}; + +struct dw_dp_output_format { + u32 bus_format; + u32 color_format; + u8 video_mapping; + u8 bpc; + u8 bpp; +}; + +#define to_dw_dp_bridge_state(s) container_of(s, struct dw_dp_bridge_state, base) + +static const struct dw_dp_output_format dw_dp_output_formats[] = { + { MEDIA_BUS_FMT_RGB101010_1X30, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_10BIT, 10, 30 }, + { MEDIA_BUS_FMT_RGB888_1X24, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_8BIT, 8, 24 }, + { MEDIA_BUS_FMT_YUV10_1X30, DRM_COLOR_FORMAT_YCBCR444, DW_DP_YCBCR444_10BIT, 10, 30 }, + { MEDIA_BUS_FMT_YUV8_1X24, DRM_COLOR_FORMAT_YCBCR444, DW_DP_YCBCR444_8BIT, 8, 24}, + { MEDIA_BUS_FMT_YUYV10_1X20, DRM_COLOR_FORMAT_YCBCR422, DW_DP_YCBCR422_10BIT, 10, 20 }, + { MEDIA_BUS_FMT_YUYV8_1X16, DRM_COLOR_FORMAT_YCBCR422, DW_DP_YCBCR422_8BIT, 8, 16 }, + { MEDIA_BUS_FMT_UYYVYY10_0_5X30, DRM_COLOR_FORMAT_YCBCR420, DW_DP_YCBCR420_10BIT, 10, 15 }, + { MEDIA_BUS_FMT_UYYVYY8_0_5X24, DRM_COLOR_FORMAT_YCBCR420, DW_DP_YCBCR420_8BIT, 8, 12 }, + { MEDIA_BUS_FMT_RGB666_1X24_CPADHI, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_6BIT, 6, 18 }, +}; + +static const struct dw_dp_output_format *dw_dp_get_output_format(u32 bus_format) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dw_dp_output_formats); i++) + if (dw_dp_output_formats[i].bus_format == bus_format) + return &dw_dp_output_formats[i]; + + return NULL; +} + +static inline struct dw_dp *bridge_to_dp(struct drm_bridge *b) +{ + return container_of(b, struct dw_dp, bridge); +} + +static struct dw_dp_bridge_state *dw_dp_get_bridge_state(struct dw_dp *dp) +{ + struct dw_dp_bridge_state *dw_bridge_state; + struct drm_bridge_state *state; + + state = drm_priv_to_bridge_state(dp->bridge.base.state); + if (!state) + return NULL; + + dw_bridge_state = to_dw_dp_bridge_state(state); + if (!dw_bridge_state) + return NULL; + + return dw_bridge_state; +} + +static inline void dw_dp_phy_set_pattern(struct dw_dp *dp, u32 pattern) +{ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, TPS_SEL, + FIELD_PREP(TPS_SEL, pattern)); +} + +static void dw_dp_phy_xmit_enable(struct dw_dp *dp, u32 lanes) +{ + u32 xmit_enable; + + switch (lanes) { + case 4: + case 2: + case 1: + xmit_enable = GENMASK(lanes - 1, 0); + break; + case 0: + default: + xmit_enable = 0; + break; + } + + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, XMIT_ENABLE, + FIELD_PREP(XMIT_ENABLE, xmit_enable)); +} + +static bool dw_dp_bandwidth_ok(struct dw_dp *dp, + const struct drm_display_mode *mode, u32 bpp, + unsigned int lanes, unsigned int rate) +{ + u32 max_bw, req_bw; + + req_bw = mode->clock * bpp / 8; + max_bw = lanes * rate; + if (req_bw > max_bw) + return false; + + return true; +} + +static bool dw_dp_hpd_detect(struct dw_dp *dp) +{ + u32 value; + + regmap_read(dp->regmap, DW_DP_HPD_STATUS, &value); + + return FIELD_GET(HPD_STATE, value) == DW_DP_HPD_STATE_PLUG; +} + +static void dw_dp_link_caps_reset(struct dw_dp_link_caps *caps) +{ + caps->enhanced_framing = false; + caps->tps3_supported = false; + caps->tps4_supported = false; + caps->fast_training = false; + caps->channel_coding = false; +} + +static void dw_dp_link_reset(struct dw_dp_link *link) +{ + link->vsc_sdp_supported = 0; + link->sink_count = 0; + link->revision = 0; + link->rate = 0; + link->lanes = 0; + + dw_dp_link_caps_reset(&link->caps); + memset(link->dpcd, 0, sizeof(link->dpcd)); +} + +static int dw_dp_link_parse(struct dw_dp *dp, struct drm_connector *connector) +{ + struct dw_dp_link *link = &dp->link; + int ret; + + dw_dp_link_reset(link); + + ret = drm_dp_read_dpcd_caps(&dp->aux, link->dpcd); + if (ret < 0) + return ret; + + drm_dp_read_desc(&dp->aux, &link->desc, drm_dp_is_branch(link->dpcd)); + + if (drm_dp_read_sink_count_cap(connector, link->dpcd, &link->desc)) { + ret = drm_dp_read_sink_count(&dp->aux); + if (ret < 0) + return ret; + + link->sink_count = ret; + + /* Dongle connected, but no display */ + if (!link->sink_count) + return -ENODEV; + } + + link->vsc_sdp_supported = drm_dp_vsc_sdp_supported(&dp->aux, link->dpcd); + + link->revision = link->dpcd[DP_DPCD_REV]; + link->rate = min_t(u32, min(dp->plat_data.max_link_rate, + dp->phy->attrs.max_link_rate * 100), + drm_dp_max_link_rate(link->dpcd)); + link->lanes = min_t(u8, phy_get_bus_width(dp->phy), + drm_dp_max_lane_count(link->dpcd)); + + link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(link->dpcd); + link->caps.tps3_supported = drm_dp_tps3_supported(link->dpcd); + link->caps.tps4_supported = drm_dp_tps4_supported(link->dpcd); + link->caps.fast_training = drm_dp_fast_training_cap(link->dpcd); + link->caps.channel_coding = drm_dp_channel_coding_supported(link->dpcd); + link->caps.ssc = !!(link->dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5); + + return 0; +} + +static int dw_dp_link_train_update_vs_emph(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_link_train_set *train_set = &link->train.adjust; + unsigned int lanes = dp->link.lanes; + union phy_configure_opts phy_cfg; + unsigned int *vs, *pe; + int i, ret; + u8 buf[4]; + + vs = train_set->voltage_swing; + pe = train_set->pre_emphasis; + + for (i = 0; i < lanes; i++) { + phy_cfg.dp.voltage[i] = vs[i]; + phy_cfg.dp.pre[i] = pe[i]; + } + + phy_cfg.dp.set_lanes = false; + phy_cfg.dp.set_rate = false; + phy_cfg.dp.set_voltages = true; + + ret = phy_configure(dp->phy, &phy_cfg); + if (ret) + return ret; + + for (i = 0; i < lanes; i++) { + buf[i] = (vs[i] << DP_TRAIN_VOLTAGE_SWING_SHIFT) | + (pe[i] << DP_TRAIN_PRE_EMPHASIS_SHIFT); + if (train_set->voltage_max_reached[i]) + buf[i] |= DP_TRAIN_MAX_SWING_REACHED; + if (train_set->pre_max_reached[i]) + buf[i] |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; + } + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, buf, lanes); + if (ret < 0) + return ret; + + return 0; +} + +static int dw_dp_phy_configure(struct dw_dp *dp, unsigned int rate, + unsigned int lanes, bool ssc) +{ + union phy_configure_opts phy_cfg; + int ret; + + /* Move PHY to P3 */ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_POWERDOWN, + FIELD_PREP(PHY_POWERDOWN, 0x3)); + + phy_cfg.dp.lanes = lanes; + phy_cfg.dp.link_rate = rate / 100; + phy_cfg.dp.ssc = ssc; + phy_cfg.dp.set_lanes = true; + phy_cfg.dp.set_rate = true; + phy_cfg.dp.set_voltages = false; + ret = phy_configure(dp->phy, &phy_cfg); + if (ret) + return ret; + + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_LANES, + FIELD_PREP(PHY_LANES, lanes / 2)); + + /* Move PHY to P0 */ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_POWERDOWN, + FIELD_PREP(PHY_POWERDOWN, 0x0)); + + dw_dp_phy_xmit_enable(dp, lanes); + + return 0; +} + +static int dw_dp_link_configure(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 buf[2]; + int ret; + + ret = dw_dp_phy_configure(dp, link->rate, link->lanes, link->caps.ssc); + if (ret) + return ret; + + buf[0] = drm_dp_link_rate_to_bw_code(link->rate); + buf[1] = link->lanes; + + if (link->caps.enhanced_framing) { + buf[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + regmap_update_bits(dp->regmap, DW_DP_CCTL, ENHANCE_FRAMING_EN, + FIELD_PREP(ENHANCE_FRAMING_EN, 1)); + } else { + regmap_update_bits(dp->regmap, DW_DP_CCTL, ENHANCE_FRAMING_EN, + FIELD_PREP(ENHANCE_FRAMING_EN, 0)); + } + + ret = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); + if (ret < 0) + return ret; + + buf[0] = link->caps.ssc ? DP_SPREAD_AMP_0_5 : 0; + buf[1] = link->caps.channel_coding ? DP_SET_ANSI_8B10B : 0; + + ret = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); + if (ret < 0) + return ret; + + return 0; +} + +static void dw_dp_link_train_init(struct dw_dp_link_train *train) +{ + struct dw_dp_link_train_set *adj = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) { + adj->voltage_swing[i] = 0; + adj->pre_emphasis[i] = 0; + adj->voltage_max_reached[i] = false; + adj->pre_max_reached[i] = false; + } + + train->clock_recovered = false; + train->channel_equalized = false; +} + +static bool dw_dp_link_train_valid(const struct dw_dp_link_train *train) +{ + return train->clock_recovered && train->channel_equalized; +} + +static int dw_dp_link_train_set_pattern(struct dw_dp *dp, u32 pattern) +{ + u8 buf = 0; + int ret; + + if (pattern && pattern != DP_TRAINING_PATTERN_4) { + buf |= DP_LINK_SCRAMBLING_DISABLE; + + regmap_update_bits(dp->regmap, DW_DP_CCTL, SCRAMBLE_DIS, + FIELD_PREP(SCRAMBLE_DIS, 1)); + } else { + regmap_update_bits(dp->regmap, DW_DP_CCTL, SCRAMBLE_DIS, + FIELD_PREP(SCRAMBLE_DIS, 0)); + } + + switch (pattern) { + case DP_TRAINING_PATTERN_DISABLE: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_NONE); + break; + case DP_TRAINING_PATTERN_1: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_1); + break; + case DP_TRAINING_PATTERN_2: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_2); + break; + case DP_TRAINING_PATTERN_3: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_3); + break; + case DP_TRAINING_PATTERN_4: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_4); + break; + default: + return -EINVAL; + } + + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + buf | pattern); + if (ret < 0) + return ret; + + return 0; +} + +static u8 dw_dp_voltage_max(u8 preemph) +{ + switch (preemph & DP_TRAIN_PRE_EMPHASIS_MASK) { + case DP_TRAIN_PRE_EMPH_LEVEL_0: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; + case DP_TRAIN_PRE_EMPH_LEVEL_1: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + case DP_TRAIN_PRE_EMPH_LEVEL_2: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_1; + case DP_TRAIN_PRE_EMPH_LEVEL_3: + default: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_0; + } +} + +static bool dw_dp_link_get_adjustments(struct dw_dp_link *link, + u8 status[DP_LINK_STATUS_SIZE]) +{ + struct dw_dp_link_train_set *adj = &link->train.adjust; + unsigned int i; + bool changed = false; + u8 v = 0; + u8 p = 0; + + for (i = 0; i < link->lanes; i++) { + v = drm_dp_get_adjust_request_voltage(status, i); + v >>= DP_TRAIN_VOLTAGE_SWING_SHIFT; + p = drm_dp_get_adjust_request_pre_emphasis(status, i); + p >>= DP_TRAIN_PRE_EMPHASIS_SHIFT; + + if (v != adj->voltage_swing[i] || p != adj->pre_emphasis[i]) + changed = true; + + if (p >= (DP_TRAIN_PRE_EMPH_LEVEL_3 >> DP_TRAIN_PRE_EMPHASIS_SHIFT)) { + adj->pre_emphasis[i] = DP_TRAIN_PRE_EMPH_LEVEL_3 >> + DP_TRAIN_PRE_EMPHASIS_SHIFT; + adj->pre_max_reached[i] = true; + } else { + adj->pre_emphasis[i] = p; + adj->pre_max_reached[i] = false; + } + + v = min(v, dw_dp_voltage_max(p)); + if (v >= (DP_TRAIN_VOLTAGE_SWING_LEVEL_3 >> DP_TRAIN_VOLTAGE_SWING_SHIFT)) { + adj->voltage_swing[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 >> + DP_TRAIN_VOLTAGE_SWING_SHIFT; + adj->voltage_max_reached[i] = true; + } else { + adj->voltage_swing[i] = v; + adj->voltage_max_reached[i] = false; + } + } + + return changed; +} + +static int dw_dp_link_clock_recovery(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 status[DP_LINK_STATUS_SIZE]; + unsigned int tries = 0; + int ret; + bool adj_changed; + + ret = dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + return ret; + + for (;;) { + ret = dw_dp_link_train_update_vs_emph(dp); + if (ret) + return ret; + + drm_dp_link_train_clock_recovery_delay(&dp->aux, link->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) { + dev_err(dp->dev, "failed to read link status: %d\n", ret); + return ret; + } + + if (drm_dp_clock_recovery_ok(status, link->lanes)) { + link->train.clock_recovered = true; + break; + } + + /* + * According to DP spec 1.4, if current ADJ is the same + * with previous REQ, we need to retry 5 times. + */ + adj_changed = dw_dp_link_get_adjustments(link, status); + if (!adj_changed) + tries++; + else + tries = 0; + + if (tries == 5) + break; + } + + return 0; +} + +static int dw_dp_link_channel_equalization(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 status[DP_LINK_STATUS_SIZE], pattern; + unsigned int tries; + int ret; + + if (link->caps.tps4_supported) + pattern = DP_TRAINING_PATTERN_4; + else if (link->caps.tps3_supported) + pattern = DP_TRAINING_PATTERN_3; + else + pattern = DP_TRAINING_PATTERN_2; + ret = dw_dp_link_train_set_pattern(dp, pattern); + if (ret) + return ret; + + for (tries = 1; tries < 5; tries++) { + ret = dw_dp_link_train_update_vs_emph(dp); + if (ret) + return ret; + + drm_dp_link_train_channel_eq_delay(&dp->aux, link->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) + return ret; + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + dev_err(dp->dev, "clock recovery lost while equalizing channel\n"); + link->train.clock_recovered = false; + break; + } + + if (drm_dp_channel_eq_ok(status, link->lanes)) { + link->train.channel_equalized = true; + break; + } + + dw_dp_link_get_adjustments(link, status); + } + + return 0; +} + +static int dw_dp_link_downgrade(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + + state = dw_dp_get_bridge_state(dp); + + switch (link->rate) { + case 162000: + return -EINVAL; + case 270000: + link->rate = 162000; + break; + case 540000: + link->rate = 270000; + break; + case 810000: + link->rate = 540000; + break; + } + + if (!dw_dp_bandwidth_ok(dp, &state->mode, state->bpp, link->lanes, + link->rate)) + return -E2BIG; + + return 0; +} + +static int dw_dp_link_train_full(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + +retry: + dw_dp_link_train_init(&link->train); + + dev_dbg(dp->dev, "full-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", link->rate / 100); + + ret = dw_dp_link_configure(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to configure DP link: %d\n", ret); + return ret; + } + + ret = dw_dp_link_clock_recovery(dp); + if (ret < 0) { + dev_err(dp->dev, "clock recovery failed: %d\n", ret); + goto out; + } + + if (!link->train.clock_recovered) { + dev_err(dp->dev, "clock recovery failed, downgrading link\n"); + + ret = dw_dp_link_downgrade(dp); + if (ret < 0) + goto out; + else + goto retry; + } + + dev_dbg(dp->dev, "clock recovery succeeded\n"); + + ret = dw_dp_link_channel_equalization(dp); + if (ret < 0) { + dev_err(dp->dev, "channel equalization failed: %d\n", ret); + goto out; + } + + if (!link->train.channel_equalized) { + dev_err(dp->dev, "channel equalization failed, downgrading link\n"); + + ret = dw_dp_link_downgrade(dp); + if (ret < 0) + goto out; + else + goto retry; + } + + dev_dbg(dp->dev, "channel equalization succeeded\n"); + +out: + dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + return ret; +} + +static int dw_dp_link_train_fast(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + u8 status[DP_LINK_STATUS_SIZE]; + u8 pattern; + + dw_dp_link_train_init(&link->train); + + dev_dbg(dp->dev, "fast-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", link->rate / 100); + + ret = dw_dp_link_configure(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to configure DP link: %d\n", ret); + return ret; + } + + ret = dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + goto out; + + usleep_range(500, 1000); + + if (link->caps.tps4_supported) + pattern = DP_TRAINING_PATTERN_4; + else if (link->caps.tps3_supported) + pattern = DP_TRAINING_PATTERN_3; + else + pattern = DP_TRAINING_PATTERN_2; + ret = dw_dp_link_train_set_pattern(dp, pattern); + if (ret) + goto out; + + usleep_range(500, 1000); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) { + dev_err(dp->dev, "failed to read link status: %d\n", ret); + goto out; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + dev_err(dp->dev, "clock recovery failed\n"); + ret = -EIO; + goto out; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) { + dev_err(dp->dev, "channel equalization failed\n"); + ret = -EIO; + goto out; + } + +out: + dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + return ret; +} + +static int dw_dp_link_train(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + + if (link->caps.fast_training) { + if (dw_dp_link_train_valid(&link->train)) { + ret = dw_dp_link_train_fast(dp); + if (ret < 0) + dev_err(dp->dev, "fast link training failed: %d\n", ret); + else + return 0; + } + } + + ret = dw_dp_link_train_full(dp); + if (ret < 0) { + dev_err(dp->dev, "full link training failed: %d\n", ret); + return ret; + } + + return 0; +} + +static int dw_dp_send_sdp(struct dw_dp *dp, struct dw_dp_sdp *sdp) +{ + const u8 *payload = sdp->base.db; + u32 reg; + int i, nr; + + nr = find_first_zero_bit(dp->sdp_reg_bank, SDP_REG_BANK_SIZE); + if (nr < SDP_REG_BANK_SIZE) + set_bit(nr, dp->sdp_reg_bank); + else + return -EBUSY; + + reg = DW_DP_SDP_REGISTER_BANK + nr * 9 * 4; + + /* SDP header */ + regmap_write(dp->regmap, reg, get_unaligned_le32(&sdp->base.sdp_header)); + + /* SDP data payload */ + for (i = 1; i < 9; i++, payload += 4) + regmap_write(dp->regmap, reg + i * 4, + FIELD_PREP(SDP_REGS, get_unaligned_le32(payload))); + + if (sdp->flags & DW_DP_SDP_VERTICAL_INTERVAL) + regmap_update_bits(dp->regmap, DW_DP_SDP_VERTICAL_CTRL, + EN_VERTICAL_SDP << nr, + EN_VERTICAL_SDP << nr); + + if (sdp->flags & DW_DP_SDP_HORIZONTAL_INTERVAL) + regmap_update_bits(dp->regmap, DW_DP_SDP_HORIZONTAL_CTRL, + EN_HORIZONTAL_SDP << nr, + EN_HORIZONTAL_SDP << nr); + + return 0; +} + +static int dw_dp_send_vsc_sdp(struct dw_dp *dp) +{ + struct dw_dp_bridge_state *state; + struct dw_dp_sdp sdp = {}; + struct drm_dp_vsc_sdp vsc = {}; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + vsc.bpc = state->bpc; + + vsc.sdp_type = DP_SDP_VSC; + vsc.revision = 0x5; + vsc.length = 0x13; + vsc.content_type = DP_CONTENT_TYPE_NOT_DEFINED; + + sdp.flags = DW_DP_SDP_VERTICAL_INTERVAL; + + switch (state->color_format) { + case DRM_COLOR_FORMAT_YCBCR444: + vsc.pixelformat = DP_PIXELFORMAT_YUV444; + break; + case DRM_COLOR_FORMAT_YCBCR420: + vsc.pixelformat = DP_PIXELFORMAT_YUV420; + break; + case DRM_COLOR_FORMAT_YCBCR422: + vsc.pixelformat = DP_PIXELFORMAT_YUV422; + break; + case DRM_COLOR_FORMAT_RGB444: + default: + vsc.pixelformat = DP_PIXELFORMAT_RGB; + break; + } + + if (state->color_format == DRM_COLOR_FORMAT_RGB444) { + vsc.colorimetry = DP_COLORIMETRY_DEFAULT; + vsc.dynamic_range = DP_DYNAMIC_RANGE_VESA; + } else { + vsc.colorimetry = DP_COLORIMETRY_BT709_YCC; + vsc.dynamic_range = DP_DYNAMIC_RANGE_CTA; + } + + drm_dp_vsc_sdp_pack(&vsc, &sdp.base); + + return dw_dp_send_sdp(dp, &sdp); +} + +static int dw_dp_video_set_pixel_mode(struct dw_dp *dp) +{ + switch (dp->pixel_mode) { + case DW_DP_MP_SINGLE_PIXEL: + case DW_DP_MP_DUAL_PIXEL: + case DW_DP_MP_QUAD_PIXEL: + break; + default: + return -EINVAL; + } + + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, PIXEL_MODE_SELECT, + FIELD_PREP(PIXEL_MODE_SELECT, dp->pixel_mode)); + + return 0; +} + +static bool dw_dp_video_need_vsc_sdp(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + if (!link->vsc_sdp_supported) + return false; + + if (state->color_format == DRM_COLOR_FORMAT_YCBCR420) + return true; + + return false; +} + +static int dw_dp_video_set_msa(struct dw_dp *dp, u8 color_format, u8 bpc, + u16 vstart, u16 hstart) +{ + u16 misc = 0; + + if (dw_dp_video_need_vsc_sdp(dp)) + misc |= DP_MSA_MISC_COLOR_VSC_SDP; + + switch (color_format) { + case DRM_COLOR_FORMAT_RGB444: + misc |= DP_MSA_MISC_COLOR_RGB; + break; + case DRM_COLOR_FORMAT_YCBCR444: + misc |= DP_MSA_MISC_COLOR_YCBCR_444_BT709; + break; + case DRM_COLOR_FORMAT_YCBCR422: + misc |= DP_MSA_MISC_COLOR_YCBCR_422_BT709; + break; + case DRM_COLOR_FORMAT_YCBCR420: + break; + default: + return -EINVAL; + } + + switch (bpc) { + case 6: + misc |= DP_MSA_MISC_6_BPC; + break; + case 8: + misc |= DP_MSA_MISC_8_BPC; + break; + case 10: + misc |= DP_MSA_MISC_10_BPC; + break; + case 12: + misc |= DP_MSA_MISC_12_BPC; + break; + case 16: + misc |= DP_MSA_MISC_16_BPC; + break; + default: + return -EINVAL; + } + + regmap_write(dp->regmap, DW_DP_VIDEO_MSA1, + FIELD_PREP(VSTART, vstart) | FIELD_PREP(HSTART, hstart)); + regmap_write(dp->regmap, DW_DP_VIDEO_MSA2, FIELD_PREP(MISC0, misc)); + regmap_write(dp->regmap, DW_DP_VIDEO_MSA3, FIELD_PREP(MISC1, misc >> 8)); + + return 0; +} + +static void dw_dp_video_disable(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_STREAM_ENABLE, + FIELD_PREP(VIDEO_STREAM_ENABLE, 0)); +} + +static int dw_dp_video_enable(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + struct drm_display_mode *mode; + u8 color_format, bpc, bpp; + u8 init_threshold, vic; + u32 hstart, hactive, hblank, h_sync_width, h_front_porch; + u32 vstart, vactive, vblank, v_sync_width, v_front_porch; + u32 peak_stream_bandwidth, link_bandwidth; + u32 average_bytes_per_tu, average_bytes_per_tu_frac; + u32 ts, hblank_interval; + u32 value; + int ret; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + bpc = state->bpc; + bpp = state->bpp; + color_format = state->color_format; + mode = &state->mode; + + vstart = mode->vtotal - mode->vsync_start; + hstart = mode->htotal - mode->hsync_start; + + ret = dw_dp_video_set_pixel_mode(dp); + if (ret) + return ret; + + ret = dw_dp_video_set_msa(dp, color_format, bpc, vstart, hstart); + if (ret) + return ret; + + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_MAPPING, + FIELD_PREP(VIDEO_MAPPING, state->video_mapping)); + + /* Configure DW_DP_VINPUT_POLARITY_CTRL register */ + value = 0; + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + value |= FIELD_PREP(HSYNC_IN_POLARITY, 1); + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + value |= FIELD_PREP(VSYNC_IN_POLARITY, 1); + regmap_write(dp->regmap, DW_DP_VINPUT_POLARITY_CTRL, value); + + /* Configure DW_DP_VIDEO_CONFIG1 register */ + hactive = mode->hdisplay; + hblank = mode->htotal - mode->hdisplay; + value = FIELD_PREP(HACTIVE, hactive) | FIELD_PREP(HBLANK, hblank); + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + value |= FIELD_PREP(I_P, 1); + vic = drm_match_cea_mode(mode); + if (vic == 5 || vic == 6 || vic == 7 || + vic == 10 || vic == 11 || vic == 20 || + vic == 21 || vic == 22 || vic == 39 || + vic == 25 || vic == 26 || vic == 40 || + vic == 44 || vic == 45 || vic == 46 || + vic == 50 || vic == 51 || vic == 54 || + vic == 55 || vic == 58 || vic == 59) + value |= R_V_BLANK_IN_OSC; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG1, value); + + /* Configure DW_DP_VIDEO_CONFIG2 register */ + vblank = mode->vtotal - mode->vdisplay; + vactive = mode->vdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG2, + FIELD_PREP(VBLANK, vblank) | FIELD_PREP(VACTIVE, vactive)); + + /* Configure DW_DP_VIDEO_CONFIG3 register */ + h_sync_width = mode->hsync_end - mode->hsync_start; + h_front_porch = mode->hsync_start - mode->hdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG3, + FIELD_PREP(H_SYNC_WIDTH, h_sync_width) | + FIELD_PREP(H_FRONT_PORCH, h_front_porch)); + + /* Configure DW_DP_VIDEO_CONFIG4 register */ + v_sync_width = mode->vsync_end - mode->vsync_start; + v_front_porch = mode->vsync_start - mode->vdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG4, + FIELD_PREP(V_SYNC_WIDTH, v_sync_width) | + FIELD_PREP(V_FRONT_PORCH, v_front_porch)); + + /* Configure DW_DP_VIDEO_CONFIG5 register */ + peak_stream_bandwidth = mode->clock * bpp / 8; + link_bandwidth = (link->rate / 1000) * link->lanes; + ts = peak_stream_bandwidth * 64 / link_bandwidth; + average_bytes_per_tu = ts / 1000; + average_bytes_per_tu_frac = ts / 100 - average_bytes_per_tu * 10; + if (dp->pixel_mode == DW_DP_MP_SINGLE_PIXEL) { + if (average_bytes_per_tu < 6) + init_threshold = 32; + else if (hblank <= 80 && color_format != DRM_COLOR_FORMAT_YCBCR420) + init_threshold = 12; + else if (hblank <= 40 && color_format == DRM_COLOR_FORMAT_YCBCR420) + init_threshold = 3; + else + init_threshold = 16; + } else { + u32 t1 = 0, t2 = 0, t3 = 0; + + switch (bpc) { + case 6: + t1 = (4 * 1000 / 9) * link->lanes; + break; + case 8: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) { + t1 = (1000 / 2) * link->lanes; + } else { + if (dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 3) * link->lanes; + else + t1 = (3000 / 16) * link->lanes; + } + break; + case 10: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) + t1 = (2000 / 5) * link->lanes; + else + t1 = (4000 / 15) * link->lanes; + break; + case 12: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) { + if (dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 6) * link->lanes; + else + t1 = (1000 / 3) * link->lanes; + } else { + t1 = (2000 / 9) * link->lanes; + } + break; + case 16: + if (color_format != DRM_COLOR_FORMAT_YCBCR422 && + dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 6) * link->lanes; + else + t1 = (1000 / 4) * link->lanes; + break; + default: + return -EINVAL; + } + + if (color_format == DRM_COLOR_FORMAT_YCBCR420) + t2 = (link->rate / 4) * 1000 / (mode->clock / 2); + else + t2 = (link->rate / 4) * 1000 / mode->clock; + + if (average_bytes_per_tu_frac) + t3 = average_bytes_per_tu + 1; + else + t3 = average_bytes_per_tu; + init_threshold = t1 * t2 * t3 / (1000 * 1000); + if (init_threshold <= 16 || average_bytes_per_tu < 10) + init_threshold = 40; + } + + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG5, + FIELD_PREP(INIT_THRESHOLD_HI, init_threshold >> 6) | + FIELD_PREP(AVERAGE_BYTES_PER_TU_FRAC, average_bytes_per_tu_frac) | + FIELD_PREP(INIT_THRESHOLD, init_threshold) | + FIELD_PREP(AVERAGE_BYTES_PER_TU, average_bytes_per_tu)); + + /* Configure DW_DP_VIDEO_HBLANK_INTERVAL register */ + hblank_interval = hblank * (link->rate / 4) / mode->clock; + regmap_write(dp->regmap, DW_DP_VIDEO_HBLANK_INTERVAL, + FIELD_PREP(HBLANK_INTERVAL_EN, 1) | + FIELD_PREP(HBLANK_INTERVAL, hblank_interval)); + + /* Video stream enable */ + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_STREAM_ENABLE, + FIELD_PREP(VIDEO_STREAM_ENABLE, 1)); + + if (dw_dp_video_need_vsc_sdp(dp)) + dw_dp_send_vsc_sdp(dp); + + return 0; +} + +static void dw_dp_hpd_init(struct dw_dp *dp) +{ + /* Enable all HPD interrupts */ + regmap_update_bits(dp->regmap, DW_DP_HPD_INTERRUPT_ENABLE, + HPD_UNPLUG_EN | HPD_PLUG_EN | HPD_IRQ_EN, + FIELD_PREP(HPD_UNPLUG_EN, 1) | + FIELD_PREP(HPD_PLUG_EN, 1) | + FIELD_PREP(HPD_IRQ_EN, 1)); + + /* Enable all top-level interrupts */ + regmap_update_bits(dp->regmap, DW_DP_GENERAL_INTERRUPT_ENABLE, + HPD_EVENT_EN, FIELD_PREP(HPD_EVENT_EN, 1)); +} + +static void dw_dp_aux_init(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_GENERAL_INTERRUPT_ENABLE, + AUX_REPLY_EVENT_EN, FIELD_PREP(AUX_REPLY_EVENT_EN, 1)); +} + +static void dw_dp_init_hw(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_CCTL, DEFAULT_FAST_LINK_TRAIN_EN, + FIELD_PREP(DEFAULT_FAST_LINK_TRAIN_EN, 0)); + + dw_dp_hpd_init(dp); + dw_dp_aux_init(dp); +} + +static int dw_dp_aux_write_data(struct dw_dp *dp, const u8 *buffer, size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value = 0; + + for (j = 0; j < num; j++) + value |= buffer[i * 4 + j] << (j * 8); + + regmap_write(dp->regmap, DW_DP_AUX_DATA0 + i * 4, value); + } + + return size; +} + +static int dw_dp_aux_read_data(struct dw_dp *dp, u8 *buffer, size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value; + + regmap_read(dp->regmap, DW_DP_AUX_DATA0 + i * 4, &value); + + for (j = 0; j < num; j++) + buffer[i * 4 + j] = value >> (j * 8); + } + + return size; +} + +static ssize_t dw_dp_aux_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + struct dw_dp *dp = container_of(aux, struct dw_dp, aux); + unsigned long timeout = msecs_to_jiffies(10); + u32 status, value; + ssize_t ret = 0; + + if (WARN_ON(msg->size > 16)) + return -E2BIG; + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_NATIVE_WRITE: + case DP_AUX_I2C_WRITE: + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + ret = dw_dp_aux_write_data(dp, msg->buffer, msg->size); + if (ret < 0) + return ret; + break; + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ: + break; + default: + return -EINVAL; + } + + if (msg->size > 0) + value = FIELD_PREP(AUX_LEN_REQ, msg->size - 1); + else + value = FIELD_PREP(I2C_ADDR_ONLY, 1); + value |= FIELD_PREP(AUX_CMD_TYPE, msg->request); + value |= FIELD_PREP(AUX_ADDR, msg->address); + regmap_write(dp->regmap, DW_DP_AUX_CMD, value); + + status = wait_for_completion_timeout(&dp->complete, timeout); + if (!status) { + dev_err(dp->dev, "timeout waiting for AUX reply\n"); + return -ETIMEDOUT; + } + + regmap_read(dp->regmap, DW_DP_AUX_STATUS, &value); + if (value & AUX_TIMEOUT) + return -ETIMEDOUT; + + msg->reply = FIELD_GET(AUX_STATUS, value); + + if (msg->size > 0 && msg->reply == DP_AUX_NATIVE_REPLY_ACK) { + if (msg->request & DP_AUX_I2C_READ) { + size_t count = FIELD_GET(AUX_BYTES_READ, value) - 1; + + if (count != msg->size) + return -EBUSY; + + ret = dw_dp_aux_read_data(dp, msg->buffer, count); + if (ret < 0) + return ret; + } + } + + return ret; +} + +/* + * Limits for the video timing for DP: + * 1. the hfp should be 2 pixels aligned; + * 2. the minimum hsync should be 9 pixel; + * 3. the minimum hbp should be 16 pixel; + */ +static int dw_dp_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_bridge_state *state; + const struct dw_dp_output_format *fmt; + struct drm_display_mode *mode; + int min_hbp = 16; + int min_hsync = 9; + + state = to_dw_dp_bridge_state(bridge_state); + mode = &state->mode; + + fmt = dw_dp_get_output_format(bridge_state->output_bus_cfg.format); + if (!fmt) + return -EINVAL; + + state->video_mapping = fmt->video_mapping; + state->color_format = fmt->color_format; + state->bpc = fmt->bpc; + state->bpp = fmt->bpp; + + if ((adjusted_mode->hsync_start - adjusted_mode->hdisplay) & 0x1) { + adjusted_mode->hsync_start += 1; + dev_warn(dp->dev, "hfp is not 2 pixeel aligned, fixup to aligned hfp\n"); + } + + if (adjusted_mode->hsync_end - adjusted_mode->hsync_start < min_hsync) { + adjusted_mode->hsync_end = adjusted_mode->hsync_start + min_hsync; + dev_warn(dp->dev, "hsync is too narrow, fixup to min hsync:%d\n", min_hsync); + } + + if (adjusted_mode->htotal - adjusted_mode->hsync_end < min_hbp) { + adjusted_mode->htotal = adjusted_mode->hsync_end + min_hbp; + dev_warn(dp->dev, "hbp is too narrow, fixup to min hbp:%d\n", min_hbp); + } + + drm_mode_copy(mode, adjusted_mode); + + return 0; +} + +static enum drm_mode_status dw_dp_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_link *link = &dp->link; + u32 min_bpp; + + if (info->color_formats & DRM_COLOR_FORMAT_YCBCR420 && + link->vsc_sdp_supported && + (drm_mode_is_420_only(info, mode) || drm_mode_is_420_also(info, mode))) + min_bpp = 12; + else if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422) + min_bpp = 16; + else if (info->color_formats & DRM_COLOR_FORMAT_RGB444) + min_bpp = 18; + else + min_bpp = 24; + + if (!link->vsc_sdp_supported && + drm_mode_is_420_only(info, mode)) + return MODE_NO_420; + + if (!dw_dp_bandwidth_ok(dp, mode, min_bpp, link->lanes, link->rate)) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static bool dw_dp_needs_link_retrain(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 link_status[DP_LINK_STATUS_SIZE]; + + if (!dw_dp_link_train_valid(&link->train)) + return false; + + if (drm_dp_dpcd_read_link_status(&dp->aux, link_status) < 0) + return false; + + /* Retrain if Channel EQ or CR not ok */ + return !drm_dp_channel_eq_ok(link_status, dp->link.lanes); +} + +static void dw_dp_link_disable(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + + if (dw_dp_hpd_detect(dp)) + drm_dp_link_power_down(&dp->aux, dp->link.revision); + + dw_dp_phy_xmit_enable(dp, 0); + + phy_power_off(dp->phy); + + link->train.clock_recovered = false; + link->train.channel_equalized = false; +} + +static int dw_dp_link_enable(struct dw_dp *dp) +{ + int ret; + + ret = phy_power_on(dp->phy); + if (ret) + return ret; + + ret = drm_dp_link_power_up(&dp->aux, dp->link.revision); + if (ret < 0) + return ret; + + ret = dw_dp_link_train(dp); + + return ret; +} + +static void dw_dp_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct drm_connector *connector; + struct drm_connector_state *conn_state; + int ret; + + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + if (!connector) { + dev_err(dp->dev, "failed to get connector\n"); + return; + } + + conn_state = drm_atomic_get_new_connector_state(state, connector); + if (!conn_state) { + dev_err(dp->dev, "failed to get connector state\n"); + return; + } + + set_bit(0, dp->sdp_reg_bank); + + ret = dw_dp_link_enable(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to enable link: %d\n", ret); + return; + } + + ret = dw_dp_video_enable(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to enable video: %d\n", ret); + return; + } +} + +static void dw_dp_reset(struct dw_dp *dp) +{ + int val; + + disable_irq(dp->irq); + regmap_update_bits(dp->regmap, DW_DP_SOFT_RESET_CTRL, CONTROLLER_RESET, + FIELD_PREP(CONTROLLER_RESET, 1)); + usleep_range(10, 20); + regmap_update_bits(dp->regmap, DW_DP_SOFT_RESET_CTRL, CONTROLLER_RESET, + FIELD_PREP(CONTROLLER_RESET, 0)); + + dw_dp_init_hw(dp); + regmap_read_poll_timeout(dp->regmap, DW_DP_HPD_STATUS, val, + FIELD_GET(HPD_HOT_PLUG, val), 200, 200000); + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_PLUG); + enable_irq(dp->irq); +} + +static void dw_dp_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + + dw_dp_video_disable(dp); + dw_dp_link_disable(dp); + bitmap_zero(dp->sdp_reg_bank, SDP_REG_BANK_SIZE); + dw_dp_reset(dp); +} + +static bool dw_dp_hpd_detect_link(struct dw_dp *dp, struct drm_connector *connector) +{ + int ret; + + ret = phy_power_on(dp->phy); + if (ret < 0) + return false; + ret = dw_dp_link_parse(dp, connector); + phy_power_off(dp->phy); + + return !ret; +} + +static enum drm_connector_status dw_dp_bridge_detect(struct drm_bridge *bridge, + struct drm_connector *connector) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + + if (!dw_dp_hpd_detect(dp)) + return connector_status_disconnected; + + if (!dw_dp_hpd_detect_link(dp, connector)) + return connector_status_disconnected; + + return connector_status_connected; +} + +static const struct drm_edid *dw_dp_bridge_edid_read(struct drm_bridge *bridge, + struct drm_connector *connector) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + const struct drm_edid *edid; + int ret; + + ret = phy_power_on(dp->phy); + if (ret) + return NULL; + + edid = drm_edid_read_ddc(connector, &dp->aux.ddc); + + phy_power_off(dp->phy); + + return edid; +} + +static u32 *dw_dp_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + unsigned int *num_output_fmts) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_link *link = &dp->link; + struct drm_display_info *di = &conn_state->connector->display_info; + struct drm_display_mode mode = crtc_state->mode; + const struct dw_dp_output_format *fmt; + u32 i, j = 0; + u32 *output_fmts; + + *num_output_fmts = 0; + + output_fmts = kcalloc(ARRAY_SIZE(dw_dp_output_formats), sizeof(*output_fmts), GFP_KERNEL); + if (!output_fmts) + return NULL; + + for (i = 0; i < ARRAY_SIZE(dw_dp_output_formats); i++) { + fmt = &dw_dp_output_formats[i]; + + if (fmt->bpc > conn_state->max_bpc) + continue; + + if (!(fmt->color_format & di->color_formats)) + continue; + + if (fmt->color_format == DRM_COLOR_FORMAT_YCBCR420 && + !link->vsc_sdp_supported) + continue; + + if (fmt->color_format != DRM_COLOR_FORMAT_YCBCR420 && + drm_mode_is_420_only(di, &mode)) + continue; + + if (!dw_dp_bandwidth_ok(dp, &mode, fmt->bpp, link->lanes, link->rate)) + continue; + + output_fmts[j++] = fmt->bus_format; + } + + *num_output_fmts = j; + + return output_fmts; +} + +static struct drm_bridge_state *dw_dp_bridge_atomic_duplicate_state(struct drm_bridge *bridge) +{ + struct dw_dp_bridge_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_bridge_duplicate_state(bridge, &state->base); + + return &state->base; +} + +static const struct drm_bridge_funcs dw_dp_bridge_funcs = { + .atomic_duplicate_state = dw_dp_bridge_atomic_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_get_input_bus_fmts = drm_atomic_helper_bridge_propagate_bus_fmt, + .atomic_get_output_bus_fmts = dw_dp_bridge_atomic_get_output_bus_fmts, + .atomic_check = dw_dp_bridge_atomic_check, + .mode_valid = dw_dp_bridge_mode_valid, + .atomic_enable = dw_dp_bridge_atomic_enable, + .atomic_disable = dw_dp_bridge_atomic_disable, + .detect = dw_dp_bridge_detect, + .edid_read = dw_dp_bridge_edid_read, +}; + +static int dw_dp_link_retrain(struct dw_dp *dp) +{ + struct drm_device *dev = dp->bridge.dev; + struct drm_modeset_acquire_ctx ctx; + int ret; + + if (!dw_dp_needs_link_retrain(dp)) + return 0; + + dev_dbg(dp->dev, "Retraining link\n"); + + drm_modeset_acquire_init(&ctx, 0); + for (;;) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (!ret) + ret = dw_dp_link_train(dp); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + + return ret; +} + +static void dw_dp_hpd_work(struct work_struct *work) +{ + struct dw_dp *dp = container_of(work, struct dw_dp, hpd_work); + bool long_hpd; + int ret; + + mutex_lock(&dp->irq_lock); + long_hpd = dp->hotplug.long_hpd; + mutex_unlock(&dp->irq_lock); + + dev_dbg(dp->dev, "[drm] Get hpd irq - %s\n", long_hpd ? "long" : "short"); + + if (!long_hpd) { + if (dw_dp_needs_link_retrain(dp)) { + ret = dw_dp_link_retrain(dp); + if (ret) + dev_warn(dp->dev, "Retrain link failed\n"); + } + } else { + drm_helper_hpd_irq_event(dp->bridge.dev); + } +} + +static void dw_dp_handle_hpd_event(struct dw_dp *dp) +{ + u32 value; + + mutex_lock(&dp->irq_lock); + regmap_read(dp->regmap, DW_DP_HPD_STATUS, &value); + + if (value & HPD_IRQ) { + dev_dbg(dp->dev, "IRQ from the HPD\n"); + dp->hotplug.long_hpd = false; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_IRQ); + } + + if (value & HPD_HOT_PLUG) { + dev_dbg(dp->dev, "Hot plug detected\n"); + dp->hotplug.long_hpd = true; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_PLUG); + } + + if (value & HPD_HOT_UNPLUG) { + dev_dbg(dp->dev, "Unplug detected\n"); + dp->hotplug.long_hpd = true; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_UNPLUG); + } + mutex_unlock(&dp->irq_lock); + + schedule_work(&dp->hpd_work); +} + +static irqreturn_t dw_dp_irq(int irq, void *data) +{ + struct dw_dp *dp = data; + u32 value; + + regmap_read(dp->regmap, DW_DP_GENERAL_INTERRUPT, &value); + if (!value) + return IRQ_NONE; + + if (value & HPD_EVENT) + dw_dp_handle_hpd_event(dp); + + if (value & AUX_REPLY_EVENT) { + regmap_write(dp->regmap, DW_DP_GENERAL_INTERRUPT, AUX_REPLY_EVENT); + complete(&dp->complete); + } + + return IRQ_HANDLED; +} + +static const struct regmap_range dw_dp_readable_ranges[] = { + regmap_reg_range(DW_DP_VERSION_NUMBER, DW_DP_ID), + regmap_reg_range(DW_DP_CONFIG_REG1, DW_DP_CONFIG_REG3), + regmap_reg_range(DW_DP_CCTL, DW_DP_SOFT_RESET_CTRL), + regmap_reg_range(DW_DP_VSAMPLE_CTRL, DW_DP_VIDEO_HBLANK_INTERVAL), + regmap_reg_range(DW_DP_AUD_CONFIG1, DW_DP_AUD_CONFIG1), + regmap_reg_range(DW_DP_SDP_VERTICAL_CTRL, DW_DP_SDP_STATUS_EN), + regmap_reg_range(DW_DP_PHYIF_CTRL, DW_DP_PHYIF_PWRDOWN_CTRL), + regmap_reg_range(DW_DP_AUX_CMD, DW_DP_AUX_DATA3), + regmap_reg_range(DW_DP_GENERAL_INTERRUPT, DW_DP_HPD_INTERRUPT_ENABLE), +}; + +static const struct regmap_access_table dw_dp_readable_table = { + .yes_ranges = dw_dp_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(dw_dp_readable_ranges), +}; + +static const struct regmap_config dw_dp_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .fast_io = true, + .max_register = DW_DP_MAX_REGISTER, + .rd_table = &dw_dp_readable_table, +}; + +static void dw_dp_phy_exit(void *data) +{ + struct dw_dp *dp = data; + + phy_exit(dp->phy); +} + +struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, + const struct dw_dp_plat_data *plat_data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dp *dp; + struct drm_bridge *bridge; + void __iomem *res; + int ret; + + dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); + if (!dp) + return ERR_PTR(-ENOMEM); + + dp = devm_drm_bridge_alloc(dev, struct dw_dp, bridge, &dw_dp_bridge_funcs); + if (IS_ERR(dp)) + return ERR_CAST(dp); + + dp->dev = dev; + dp->pixel_mode = DW_DP_MP_QUAD_PIXEL; + + dp->plat_data.max_link_rate = plat_data->max_link_rate; + bridge = &dp->bridge; + mutex_init(&dp->irq_lock); + INIT_WORK(&dp->hpd_work, dw_dp_hpd_work); + init_completion(&dp->complete); + + res = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(res)) + return ERR_CAST(res); + + dp->regmap = devm_regmap_init_mmio(dev, res, &dw_dp_regmap_config); + if (IS_ERR(dp->regmap)) { + dev_err_probe(dev, PTR_ERR(dp->regmap), "failed to create regmap\n"); + return ERR_CAST(dp->regmap); + } + + dp->phy = devm_of_phy_get(dev, dev->of_node, NULL); + if (IS_ERR(dp->phy)) { + dev_err_probe(dev, PTR_ERR(dp->phy), "failed to get phy\n"); + return ERR_CAST(dp->phy); + } + + dp->apb_clk = devm_clk_get_enabled(dev, "apb"); + if (IS_ERR(dp->apb_clk)) { + dev_err_probe(dev, PTR_ERR(dp->apb_clk), "failed to get apb clock\n"); + return ERR_CAST(dp->apb_clk); + } + + dp->aux_clk = devm_clk_get_enabled(dev, "aux"); + if (IS_ERR(dp->aux_clk)) { + dev_err_probe(dev, PTR_ERR(dp->aux_clk), "failed to get aux clock\n"); + return ERR_CAST(dp->aux_clk); + } + + dp->i2s_clk = devm_clk_get(dev, "i2s"); + if (IS_ERR(dp->i2s_clk)) { + dev_err_probe(dev, PTR_ERR(dp->i2s_clk), "failed to get i2s clock\n"); + return ERR_CAST(dp->i2s_clk); + } + + dp->spdif_clk = devm_clk_get(dev, "spdif"); + if (IS_ERR(dp->spdif_clk)) { + dev_err_probe(dev, PTR_ERR(dp->spdif_clk), "failed to get spdif clock\n"); + return ERR_CAST(dp->spdif_clk); + } + + dp->hdcp_clk = devm_clk_get(dev, "hdcp"); + if (IS_ERR(dp->hdcp_clk)) { + dev_err_probe(dev, PTR_ERR(dp->hdcp_clk), "failed to get hdcp clock\n"); + return ERR_CAST(dp->hdcp_clk); + } + + dp->rstc = devm_reset_control_get(dev, NULL); + if (IS_ERR(dp->rstc)) { + dev_err_probe(dev, PTR_ERR(dp->rstc), "failed to get reset control\n"); + return ERR_CAST(dp->rstc); + } + + bridge->of_node = dev->of_node; + bridge->ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD; + bridge->type = DRM_MODE_CONNECTOR_DisplayPort; + bridge->ycbcr_420_allowed = true; + + dp->aux.dev = dev; + dp->aux.drm_dev = encoder->dev; + dp->aux.name = dev_name(dev); + dp->aux.transfer = dw_dp_aux_transfer; + ret = drm_dp_aux_register(&dp->aux); + if (ret) { + dev_err_probe(dev, ret, "Aux register failed\n"); + return ERR_PTR(ret); + } + + ret = drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) + dev_err_probe(dev, ret, "Failed to attach bridge\n"); + + dw_dp_init_hw(dp); + + ret = phy_init(dp->phy); + if (ret) { + dev_err_probe(dev, ret, "phy init failed\n"); + return ERR_PTR(ret); + } + + ret = devm_add_action_or_reset(dev, dw_dp_phy_exit, dp); + if (ret) + return ERR_PTR(ret); + + dp->irq = platform_get_irq(pdev, 0); + if (dp->irq < 0) + return ERR_PTR(ret); + + ret = devm_request_threaded_irq(dev, dp->irq, NULL, dw_dp_irq, + IRQF_ONESHOT, dev_name(dev), dp); + if (ret) { + dev_err_probe(dev, ret, "failed to request irq\n"); + return ERR_PTR(ret); + } + + return dp; +} +EXPORT_SYMBOL_GPL(dw_dp_bind); + +MODULE_AUTHOR("Andy Yan <andyshrk@163.com>"); +MODULE_DESCRIPTION("DW DP Core Library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 464390372b34..ae0d08e5e960 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -393,6 +393,17 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) gpiod_set_value_cansleep(pdata->enable_gpio, 1); /* + * After EN is deasserted and an external clock is detected, the bridge + * will sample GPIO3:1 to determine its frequency. The driver will + * overwrite this setting in ti_sn_bridge_set_refclk_freq(). But this is + * racy. Thus we have to wait a couple of us. According to the datasheet + * the GPIO lines has to be stable at least 5 us (td5) but it seems that + * is not enough and the refclk frequency value is still lost or + * overwritten by the bridge itself. Waiting for 20us seems to work. + */ + usleep_range(20, 30); + + /* * If we have a reference clock we can enable communication w/ the * panel (including the aux channel) w/out any need for an input clock * so we can do it in resume which lets us read the EDID before diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 091c5335355a..baacd21e7341 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -751,12 +751,11 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, return ERR_PTR(-EINVAL); } - if (!drm_bridge_get_next_bridge(bridge)) + if (drm_bridge_is_last(bridge)) connector_type = bridge->type; #ifdef CONFIG_OF - if (!drm_bridge_get_next_bridge(bridge) && - bridge->of_node) + if (drm_bridge_is_last(bridge) && bridge->of_node) connector->fwnode = fwnode_handle_get(of_fwnode_handle(bridge->of_node)); #endif @@ -777,8 +776,6 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (!connector->ycbcr_420_allowed) supported_formats &= ~BIT(HDMI_COLORSPACE_YUV420); - bridge = bridge_connector->bridge_hdmi; - ret = drmm_connector_hdmi_init(drm, connector, bridge_connector->bridge_hdmi->vendor, bridge_connector->bridge_hdmi->product, diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 1ecc3df7e316..4aaeae4fa03c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -3962,6 +3962,7 @@ int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_bac int ret; unsigned int offset = DP_EDP_BACKLIGHT_BRIGHTNESS_MSB; u8 buf[3] = { 0 }; + size_t len = 2; /* The panel uses the PWM for controlling brightness levels */ if (!(bl->aux_set || bl->luminance_set)) @@ -3974,6 +3975,7 @@ int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_bac buf[1] = (level & 0x00ff00) >> 8; buf[2] = (level & 0xff0000) >> 16; offset = DP_EDP_PANEL_TARGET_LUMINANCE_VALUE; + len = 3; } else if (bl->lsb_reg_used) { buf[0] = (level & 0xff00) >> 8; buf[1] = (level & 0x00ff); @@ -3981,7 +3983,7 @@ int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_bac buf[0] = level; } - ret = drm_dp_dpcd_write_data(aux, offset, buf, sizeof(buf)); + ret = drm_dp_dpcd_write_data(aux, offset, buf, len); if (ret < 0) { drm_err(aux->drm_dev, "%s: Failed to write aux backlight level: %d\n", diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index ecc73d52bfae..85dbdaa4a2e2 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1078,19 +1078,20 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } if (async_flip) { - /* check if the prop does a nop change */ - if ((prop != config->prop_fb_id && - prop != config->prop_in_fence_fd && - prop != config->prop_fb_damage_clips)) { - ret = drm_atomic_plane_get_property(plane, plane_state, - prop, &old_val); - ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - } + /* no-op changes are always allowed */ + ret = drm_atomic_plane_get_property(plane, plane_state, + prop, &old_val); + ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - /* ask the driver if this non-primary plane is supported */ - if (plane->type != DRM_PLANE_TYPE_PRIMARY) { - ret = -EINVAL; + /* fail everything that isn't no-op or a pure flip */ + if (ret && prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips) { + break; + } + if (ret && plane->type != DRM_PLANE_TYPE_PRIMARY) { + /* ask the driver if this non-primary plane is supported */ if (plane_funcs && plane_funcs->atomic_async_check) ret = plane_funcs->atomic_async_check(plane, state, true); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index dd439d55177a..d031447eebc9 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1435,6 +1435,9 @@ static void drm_bridge_debugfs_show_bridge(struct drm_printer *p, unsigned int idx) { drm_printf(p, "bridge[%u]: %ps\n", idx, bridge->funcs); + + drm_printf(p, "\trefcount: %u\n", kref_read(&bridge->refcount)); + drm_printf(p, "\ttype: [%d] %s\n", bridge->type, drm_get_connector_type_name(bridge->type)); diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 37a3270bc3c2..131c1c9ae92f 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -817,6 +817,40 @@ void drm_crtc_load_palette_8(struct drm_crtc *crtc, const struct drm_color_lut * } EXPORT_SYMBOL(drm_crtc_load_palette_8); +static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b, + drm_crtc_set_lut_func set_palette) +{ + unsigned int i = (r << 5) | (g << 2) | b; /* 8-bit palette index */ + + /* Expand R (3-bit) G (3-bit) and B (2-bit) values to 16-bit values */ + r = (r << 13) | (r << 10) | (r << 7) | (r << 4) | (r << 1) | (r >> 2); + g = (g << 13) | (g << 10) | (g << 7) | (g << 4) | (g << 1) | (g >> 2); + b = (b << 14) | (b << 12) | (b << 10) | (b << 8) | (b << 6) | (b << 4) | (b << 2) | b; + + set_palette(crtc, i, r, g, b); +} + +/** + * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats + * @crtc: The displaying CRTC + * @set_palette: Callback for programming the hardware gamma LUT + * + * Programs an RGB332 palette to hardware. + */ +void drm_crtc_fill_palette_332(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette) +{ + unsigned int r, g, b; + + /* Limits of 8-8-4 are the maximum number of values for each channel. */ + for (r = 0; r < 8; ++r) { + for (g = 0; g < 8; ++g) { + for (b = 0; b < 4; ++b) + fill_palette_332(crtc, r, g, b, set_palette); + } + } +} +EXPORT_SYMBOL(drm_crtc_fill_palette_332); + static void fill_palette_8(struct drm_crtc *crtc, unsigned int i, drm_crtc_set_lut_func set_palette) { diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index cdd591b11488..0ac723a46a91 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -532,6 +532,8 @@ static const char *drm_get_wedge_recovery(unsigned int opt) return "rebind"; case DRM_WEDGE_RECOVERY_BUS_RESET: return "bus-reset"; + case DRM_WEDGE_RECOVERY_VENDOR: + return "vendor-specific"; default: return NULL; } diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 8f3daf38ca63..006836554cc2 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -1243,6 +1243,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d } else if (dst_format == DRM_FORMAT_BGRX8888) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; + } else if (dst_format == DRM_FORMAT_RGB332) { + drm_fb_xrgb8888_to_rgb332(dst, dst_pitch, src, fb, clip, state); + return 0; } } @@ -1253,6 +1256,25 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d } EXPORT_SYMBOL(drm_fb_blit); +static void drm_fb_gray8_to_gray2_line(void *dbuf, const void *sbuf, unsigned int pixels) +{ + u8 *dbuf8 = dbuf; + const u8 *sbuf8 = sbuf; + u8 px; + + while (pixels) { + unsigned int i, bits = min(pixels, 4U); + u8 byte = 0; + + for (i = 0; i < bits; i++, pixels--) { + byte >>= 2; + px = (*sbuf8++ * 3 + 127) / 255; + byte |= (px &= 0x03) << 6; + } + *dbuf8++ = byte; + } +} + static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; @@ -1359,3 +1381,92 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc } } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); + +/** + * drm_fb_xrgb8888_to_gray2 - Convert XRGB8888 to gray2 + * @dst: Array of gray2 destination buffer + * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines + * within @dst; can be NULL if scanlines are stored next to each other. + * @src: Array of XRGB8888 source buffers + * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy + * @state: Transform and conversion state + * + * This function copies parts of a framebuffer to display memory and converts the + * color format during the process. Destination and framebuffer formats must match. The + * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at + * least as many entries as there are planes in @fb's format. Each entry stores the + * value for the format's respective color plane at the same index. + * + * This function does not apply clipping on @dst (i.e. the destination is at the + * top-left corner). The first pixel (upper left corner of the clip rectangle) will + * be converted and copied to the two first bits (LSB) in the first byte of the gray2 + * destination buffer. If the caller requires that the first pixel in a byte must + * be located at an x-coordinate that is a multiple of 8, then the caller must take + * care itself of supplying a suitable clip rectangle. + * + * DRM doesn't have native gray2 support. Drivers can use this function for + * gray2 devices that don't support XRGB8888 natively. Such drivers can + * announce the commonly supported XR24 format to userspace and use this function + * to convert to the native format. + * + */ +void drm_fb_xrgb8888_to_gray2(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip, struct drm_format_conv_state *state) +{ + static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + 0, 0, 0, 0 + }; + unsigned int linepixels = drm_rect_width(clip); + unsigned int lines = drm_rect_height(clip); + unsigned int cpp = fb->format->cpp[0]; + unsigned int len_src32 = linepixels * cpp; + struct drm_device *dev = fb->dev; + void *vaddr = src[0].vaddr; + unsigned int dst_pitch_0; + unsigned int y; + u8 *gray2 = dst[0].vaddr, *gray8; + u32 *src32; + + if (drm_WARN_ON(dev, fb->format->format != DRM_FORMAT_XRGB8888)) + return; + + if (!dst_pitch) + dst_pitch = default_dst_pitch; + dst_pitch_0 = dst_pitch[0]; + + /* + * The gray2 destination buffer contains 2 bit per pixel + */ + if (!dst_pitch_0) + dst_pitch_0 = DIV_ROUND_UP(linepixels, 4); + + /* + * The dma memory is write-combined so reads are uncached. + * Speed up by fetching one line at a time. + * + * Also, format conversion from XR24 to gray2 are done + * line-by-line but are converted to 8-bit grayscale as an + * intermediate step. + * + * Allocate a buffer to be used for both copying from the cma + * memory and to store the intermediate grayscale line pixels. + */ + src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); + if (!src32) + return; + + gray8 = (u8 *)src32 + len_src32; + + vaddr += clip_offset(clip, fb->pitches[0], cpp); + for (y = 0; y < lines; y++) { + src32 = memcpy(src32, vaddr, len_src32); + drm_fb_xrgb8888_to_gray8_line(gray8, src32, linepixels); + drm_fb_gray8_to_gray2_line(gray2, gray8, linepixels); + vaddr += fb->pitches[0]; + gray2 += dst_pitch_0; + } +} +EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray2); + diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4a89b6acb6af..8d25cc65707d 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -187,6 +187,7 @@ void drm_gem_private_object_init(struct drm_device *dev, kref_init(&obj->refcount); obj->handle_count = 0; obj->size = size; + mutex_init(&obj->gpuva.lock); dma_resv_init(&obj->_resv); if (!obj->resv) obj->resv = &obj->_resv; @@ -210,6 +211,7 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) WARN_ON(obj->dma_buf); dma_resv_fini(&obj->_resv); + mutex_destroy(&obj->gpuva.lock); } EXPORT_SYMBOL(drm_gem_private_object_fini); diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 647b49ff2da5..e2a9a6ae1d54 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -975,7 +975,7 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, }; for (i = 0, j = 0; i < npages; j++) { - struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; + struct drm_pagemap_addr *addr = &range->dma_addr[j]; if (addr->proto == DRM_INTERCONNECT_SYSTEM) dma_unmap_page(dev, @@ -1328,7 +1328,7 @@ map_pages: goto err_unmap; } - range->dma_addr[j] = drm_pagemap_device_addr_encode + range->dma_addr[j] = drm_pagemap_addr_encode (addr, DRM_INTERCONNECT_SYSTEM, order, DMA_BIDIRECTIONAL); } diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index d6bea8a4fffd..a52e95555549 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -40,7 +40,7 @@ * mapping's backing &drm_gem_object buffers. * * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing - * all existent GPU VA mappings using this &drm_gem_object as backing buffer. + * all existing GPU VA mappings using this &drm_gem_object as backing buffer. * * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. @@ -72,7 +72,7 @@ * but it can also be a 'dummy' object, which can be allocated with * drm_gpuvm_resv_object_alloc(). * - * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * In order to connect a struct drm_gpuva to its backing &drm_gem_object each * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each * &drm_gpuvm_bo contains a list of &drm_gpuva structures. * @@ -81,7 +81,7 @@ * This is ensured by the API through drm_gpuvm_bo_obtain() and * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this - * particular combination. If not existent a new instance is created and linked + * particular combination. If not present, a new instance is created and linked * to the &drm_gem_object. * * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used @@ -108,7 +108,7 @@ * sequence of operations to satisfy a given map or unmap request. * * Therefore the DRM GPU VA manager provides an algorithm implementing splitting - * and merging of existent GPU VA mappings with the ones that are requested to + * and merging of existing GPU VA mappings with the ones that are requested to * be mapped or unmapped. This feature is required by the Vulkan API to * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this * as VM BIND. @@ -119,7 +119,7 @@ * execute in order to integrate the new mapping cleanly into the current state * of the GPU VA space. * - * Depending on how the new GPU VA mapping intersects with the existent mappings + * Depending on how the new GPU VA mapping intersects with the existing mappings * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount * of unmap operations, a maximum of two remap operations and a single map * operation. The caller might receive no callback at all if no operation is @@ -139,16 +139,16 @@ * one unmap operation and one or two map operations, such that drivers can * derive the page table update delta accordingly. * - * Note that there can't be more than two existent mappings to split up, one at + * Note that there can't be more than two existing mappings to split up, one at * the beginning and one at the end of the new mapping, hence there is a * maximum of two remap operations. * * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to * call back into the driver in order to unmap a range of GPU VA space. The - * logic behind this function is way simpler though: For all existent mappings + * logic behind this function is way simpler though: For all existing mappings * enclosed by the given range unmap operations are created. For mappings which - * are only partically located within the given range, remap operations are - * created such that those mappings are split up and re-mapped partically. + * are only partially located within the given range, remap operations are + * created such that those mappings are split up and re-mapped partially. * * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(), * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used @@ -168,7 +168,7 @@ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and * drm_gpuva_unmap() instead. * - * The following diagram depicts the basic relationships of existent GPU VA + * The following diagram depicts the basic relationships of existing GPU VA * mappings, a newly requested mapping and the resulting mappings as implemented * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these. * @@ -218,7 +218,7 @@ * * * 4) Existent mapping is a left aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -236,9 +236,9 @@ * and/or non-contiguous BO offset. * * - * 5) Requested mapping's range is a left aligned subset of the existent one, + * 5) Requested mapping's range is a left aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one adjusting its BO offset. + * the existing one adjusting its BO offset. * * :: * @@ -271,9 +271,9 @@ * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) * * - * 7) Requested mapping's range is a right aligned subset of the existent one, + * 7) Requested mapping's range is a right aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one, without adjusting the BO offset. + * the existing one, without adjusting the BO offset. * * :: * @@ -304,7 +304,7 @@ * * 9) Existent mapping is overlapped at the end by the requested mapping backed * by a different BO. Hence, map the requested mapping and split up the - * existent one, without adjusting the BO offset. + * existing one, without adjusting the BO offset. * * :: * @@ -334,9 +334,9 @@ * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) * * - * 11) Requested mapping's range is a centered subset of the existent one + * 11) Requested mapping's range is a centered subset of the existing one * having a different backing BO. Hence, map the requested mapping and split - * up the existent one in two mappings, adjusting the BO offset of the right + * up the existing one in two mappings, adjusting the BO offset of the right * one accordingly. * * :: @@ -351,7 +351,7 @@ * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) * * - * 12) Requested mapping is a contiguous subset of the existent one. Split it + * 12) Requested mapping is a contiguous subset of the existing one. Split it * up, but indicate that the backing PTEs could be kept. * * :: @@ -367,7 +367,7 @@ * * * 13) Existent mapping is a right aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -386,7 +386,7 @@ * * * 14) Existent mapping is a centered subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -406,7 +406,7 @@ * * 15) Existent mappings is overlapped at the beginning by the requested mapping * backed by a different BO. Hence, map the requested mapping and split up - * the existent one, adjusting its BO offset accordingly. + * the existing one, adjusting its BO offset accordingly. * * :: * @@ -497,8 +497,7 @@ * DRM GPUVM also does not take care of the locking of the backing * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by * itself; drivers are responsible to enforce mutual exclusion using either the - * GEMs dma_resv lock or alternatively a driver specific external lock. For the - * latter see also drm_gem_gpuva_set_lock(). + * GEMs dma_resv lock or the GEMs gpuva.lock mutex. * * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed @@ -534,8 +533,8 @@ * make use of them. * * The below code is strictly limited to illustrate the generic usage pattern. - * To maintain simplicitly, it doesn't make use of any abstractions for common - * code, different (asyncronous) stages with fence signalling critical paths, + * To maintain simplicity, it doesn't make use of any abstractions for common + * code, different (asynchronous) stages with fence signalling critical paths, * any other helpers or error handling in terms of freeing memory and dropping * previously taken locks. * @@ -544,7 +543,7 @@ * // Allocates a new &drm_gpuva. * struct drm_gpuva * driver_gpuva_alloc(void); * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -652,7 +651,7 @@ * .sm_step_unmap = driver_gpuva_unmap, * }; * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -750,7 +749,7 @@ * * This helper is here to provide lockless list iteration. Lockless as in, the * iterator releases the lock immediately after picking the first element from - * the list, so list insertion deletion can happen concurrently. + * the list, so list insertion and deletion can happen concurrently. * * Elements popped from the original list are kept in a local list, so removal * and is_empty checks can still happen while we're iterating the list. @@ -1230,7 +1229,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, } /** - * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * drm_gpuvm_prepare_objects() - prepare all associated BOs * @gpuvm: the &drm_gpuvm * @exec: the &drm_exec locking context * @num_fences: the amount of &dma_fences to reserve @@ -1300,13 +1299,13 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); /** - * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * * Acquires all dma-resv locks of all &drm_gem_objects the given * &drm_gpuvm contains mappings of. * - * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * Additionally, when calling this function with struct drm_gpuvm_exec::extra * being set the driver receives the given @fn callback to lock additional * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers * would call drm_exec_prepare_obj() from within this callback. @@ -1363,7 +1362,7 @@ fn_lock_array(struct drm_gpuvm_exec *vm_exec) } /** - * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * @objs: additional &drm_gem_objects to lock * @num_objs: the number of additional &drm_gem_objects to lock @@ -1582,7 +1581,7 @@ drm_gpuvm_bo_destroy(struct kref *kref) drm_gpuvm_bo_list_del(vm_bo, extobj, lock); drm_gpuvm_bo_list_del(vm_bo, evict, lock); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_del(&vm_bo->list.entry.gem); if (ops && ops->vm_bo_free) @@ -1603,7 +1602,8 @@ drm_gpuvm_bo_destroy(struct kref *kref) * If the reference count drops to zero, the &gpuvm_bo is destroyed, which * includes removing it from the GEMs gpuva list. Hence, if a call to this * function can potentially let the reference count drop to zero the caller must - * hold the dma-resv or driver specific GEM gpuva lock. + * hold the lock that the GEM uses for its gpuva list (either the GEM's + * dma-resv or gpuva.lock mutex). * * This function may only be called from non-atomic context. * @@ -1627,7 +1627,7 @@ __drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, { struct drm_gpuvm_bo *vm_bo; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); drm_gem_for_each_gpuvm_bo(vm_bo, obj) if (vm_bo->vm == gpuvm) return vm_bo; @@ -1658,7 +1658,7 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); /** - * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * drm_gpuvm_bo_obtain() - obtains an instance of the &drm_gpuvm_bo for the * given &drm_gpuvm and &drm_gem_object * @gpuvm: The &drm_gpuvm the @obj is mapped in. * @obj: The &drm_gem_object being mapped in the @gpuvm. @@ -1686,7 +1686,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, if (!vm_bo) return ERR_PTR(-ENOMEM); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list); return vm_bo; @@ -1694,7 +1694,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); /** - * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * drm_gpuvm_bo_obtain_prealloc() - obtains an instance of the &drm_gpuvm_bo * for the given &drm_gpuvm and &drm_gem_object * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. * @@ -1722,7 +1722,7 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) return vm_bo; } - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); return __vm_bo; @@ -1758,7 +1758,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); * @vm_bo: the &drm_gpuvm_bo to add or remove * @evict: indicates whether the object is evicted * - * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvm's evicted list. */ void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) @@ -1860,7 +1860,7 @@ __drm_gpuva_remove(struct drm_gpuva *va) * drm_gpuva_remove() - remove a &drm_gpuva * @va: the &drm_gpuva to remove * - * This removes the given &va from the underlaying tree. + * This removes the given &va from the underlying tree. * * It is safe to use this function using the safe versions of iterating the GPU * VA space, such as drm_gpuvm_for_each_va_safe() and @@ -1894,8 +1894,7 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove); * reference of the latter is taken. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using either the GEMs dma_resv lock or a driver specific - * lock set through drm_gem_gpuva_set_lock(). + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. */ void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) @@ -1910,7 +1909,7 @@ drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) va->vm_bo = drm_gpuvm_bo_get(vm_bo); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&va->gem.entry, &vm_bo->list.gpuva); } EXPORT_SYMBOL_GPL(drm_gpuva_link); @@ -1930,8 +1929,7 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link); * the latter is dropped. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using either the GEMs dma_resv lock or a driver specific - * lock set through drm_gem_gpuva_set_lock(). + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. */ void drm_gpuva_unlink(struct drm_gpuva *va) @@ -1942,7 +1940,7 @@ drm_gpuva_unlink(struct drm_gpuva *va) if (unlikely(!obj)) return; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(va->vm, obj); list_del_init(&va->gem.entry); va->vm_bo = NULL; @@ -2464,7 +2462,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); * * This function iterates the given range of the GPU VA space. It utilizes the * &drm_gpuvm_ops to call back into the driver providing the operations to - * unmap and, if required, split existent mappings. + * unmap and, if required, split existing mappings. * * Drivers may use these callbacks to update the GPU VA space right away within * the callback. In case the driver decides to copy and store the operations for @@ -2533,7 +2531,9 @@ static const struct drm_gpuvm_ops lock_ops = { * remapped, and locks+prepares (drm_exec_prepare_object()) objects that * will be newly mapped. * - * The expected usage is: + * The expected usage is:: + * + * .. code-block:: c * * vm_bind { * struct drm_exec exec; @@ -2574,7 +2574,7 @@ static const struct drm_gpuvm_ops lock_ops = { * required without the earlier DRIVER_OP_MAP. This is safe because we've * already locked the GEM object in the earlier DRIVER_OP_MAP step. * - * Returns: 0 on success or a negative error codec + * Returns: 0 on success or a negative error code */ int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, @@ -2746,12 +2746,12 @@ err_free_ops: * @req: map request arguments * * This function creates a list of operations to perform splitting and merging - * of existent mapping(s) with the newly requested one. + * of existing mapping(s) with the newly requested one. * * The list can be iterated with &drm_gpuva_for_each_op and must be processed * in the given order. It can contain map, unmap and remap operations, but it * also can be empty if no operation is required, e.g. if the requested mapping - * already exists is the exact same way. + * already exists in the exact same way. * * There can be an arbitrary amount of unmap operations, a maximum of two remap * operations and a single map operation. The latter one represents the original @@ -2943,8 +2943,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); * After the caller finished processing the returned &drm_gpuva_ops, they must * be freed with &drm_gpuva_ops_free. * - * It is the callers responsibility to protect the GEMs GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * This function expects the caller to protect the GEM's GPUVA list against + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. * * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure */ @@ -2956,7 +2956,7 @@ drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo) struct drm_gpuva *va; int ret; - drm_gem_gpuva_assert_lock_held(vm_bo->obj); + drm_gem_gpuva_assert_lock_held(vm_bo->vm, vm_bo->obj); ops = kzalloc(sizeof(*ops), GFP_KERNEL); if (!ops) diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index d0183dea7703..4f65ce729a47 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -55,7 +55,8 @@ EXPORT_SYMBOL(drm_of_crtc_port_mask); * and generate the DRM mask of CRTCs which may be attached to this * encoder. * - * See Documentation/devicetree/bindings/graph.txt for the bindings. + * See https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/graph.yaml + * for the bindings. */ uint32_t drm_of_find_possible_crtcs(struct drm_device *dev, struct device_node *port) @@ -106,7 +107,9 @@ EXPORT_SYMBOL_GPL(drm_of_component_match_add); * Parse the platform device OF node and bind all the components associated * with the master. Interface ports are added before the encoders in order to * satisfy their .bind requirements - * See Documentation/devicetree/bindings/graph.txt for the bindings. + * + * See https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/graph.yaml + * for the bindings. * * Returns zero if successful, or one of the standard error codes if it fails. */ diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 1da55322af12..22c44807e3fe 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -202,7 +202,7 @@ static void drm_pagemap_get_devmem_page(struct page *page, /** * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration * @dev: The device for which the pages are being mapped - * @dma_addr: Array to store DMA addresses corresponding to mapped pages + * @pagemap_addr: Array to store DMA information corresponding to mapped pages * @migrate_pfn: Array of migrate page frame numbers to map * @npages: Number of pages to map * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) @@ -215,25 +215,39 @@ static void drm_pagemap_get_devmem_page(struct page *page, * Returns: 0 on success, -EFAULT if an error occurs during mapping. */ static int drm_pagemap_migrate_map_pages(struct device *dev, - dma_addr_t *dma_addr, + struct drm_pagemap_addr *pagemap_addr, unsigned long *migrate_pfn, unsigned long npages, enum dma_data_direction dir) { unsigned long i; - for (i = 0; i < npages; ++i) { + for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(migrate_pfn[i]); + dma_addr_t dma_addr; + struct folio *folio; + unsigned int order = 0; if (!page) - continue; + goto next; if (WARN_ON_ONCE(is_zone_device_page(page))) return -EFAULT; - dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); - if (dma_mapping_error(dev, dma_addr[i])) + folio = page_folio(page); + order = folio_order(folio); + + dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); + if (dma_mapping_error(dev, dma_addr)) return -EFAULT; + + pagemap_addr[i] = + drm_pagemap_addr_encode(dma_addr, + DRM_INTERCONNECT_SYSTEM, + order, dir); + +next: + i += NR_PAGES(order); } return 0; @@ -242,7 +256,7 @@ static int drm_pagemap_migrate_map_pages(struct device *dev, /** * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration * @dev: The device for which the pages were mapped - * @dma_addr: Array of DMA addresses corresponding to mapped pages + * @pagemap_addr: Array of DMA information corresponding to mapped pages * @npages: Number of pages to unmap * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) * @@ -251,17 +265,20 @@ static int drm_pagemap_migrate_map_pages(struct device *dev, * if it's valid and not already unmapped, and unmaps the corresponding page. */ static void drm_pagemap_migrate_unmap_pages(struct device *dev, - dma_addr_t *dma_addr, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages, enum dma_data_direction dir) { unsigned long i; - for (i = 0; i < npages; ++i) { - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) - continue; + for (i = 0; i < npages;) { + if (!pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr)) + goto next; - dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); + dma_unmap_page(dev, pagemap_addr[i].addr, PAGE_SIZE << pagemap_addr[i].order, dir); + +next: + i += NR_PAGES(pagemap_addr[i].order); } } @@ -314,7 +331,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, struct vm_area_struct *vas; struct drm_pagemap_zdd *zdd = NULL; struct page **pages; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; void *buf; int err; @@ -340,14 +357,14 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, goto err_out; } - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + sizeof(*pages), GFP_KERNEL); if (!buf) { err = -ENOMEM; goto err_out; } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; + pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; zdd = drm_pagemap_zdd_alloc(pgmap_owner); if (!zdd) { @@ -377,8 +394,9 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, if (err) goto err_finalize; - err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, migrate.src, npages, DMA_TO_DEVICE); + if (err) goto err_finalize; @@ -390,7 +408,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, drm_pagemap_get_devmem_page(page, zdd); } - err = ops->copy_to_devmem(pages, dma_addr, npages); + err = ops->copy_to_devmem(pages, pagemap_addr, npages); if (err) goto err_finalize; @@ -404,7 +422,7 @@ err_finalize: drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); migrate_vma_pages(&migrate); migrate_vma_finalize(&migrate); - drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, DMA_TO_DEVICE); err_free: if (zdd) @@ -442,54 +460,80 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, { unsigned long i; - for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { - struct page *page, *src_page; + for (i = 0; i < npages;) { + struct page *page = NULL, *src_page; + struct folio *folio; + unsigned int order = 0; if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) - continue; + goto next; src_page = migrate_pfn_to_page(src_mpfn[i]); if (!src_page) - continue; + goto next; if (fault_page) { if (src_page->zone_device_data != fault_page->zone_device_data) - continue; + goto next; } + order = folio_order(page_folio(src_page)); + + /* TODO: Support fallback to single pages if THP allocation fails */ if (vas) - page = alloc_page_vma(GFP_HIGHUSER, vas, addr); + folio = vma_alloc_folio(GFP_HIGHUSER, order, vas, addr); else - page = alloc_page(GFP_HIGHUSER); + folio = folio_alloc(GFP_HIGHUSER, order); - if (!page) + if (!folio) goto free_pages; + page = folio_page(folio, 0); mpfn[i] = migrate_pfn(page_to_pfn(page)); + +next: + if (page) + addr += page_size(page); + else + addr += PAGE_SIZE; + + i += NR_PAGES(order); } - for (i = 0; i < npages; ++i) { + for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(mpfn[i]); + unsigned int order = 0; if (!page) - continue; + goto next_lock; - WARN_ON_ONCE(!trylock_page(page)); - ++*mpages; + WARN_ON_ONCE(!folio_trylock(page_folio(page))); + + order = folio_order(page_folio(page)); + *mpages += NR_PAGES(order); + +next_lock: + i += NR_PAGES(order); } return 0; free_pages: - for (i = 0; i < npages; ++i) { + for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(mpfn[i]); + unsigned int order = 0; if (!page) - continue; + goto next_put; put_page(page); mpfn[i] = 0; + + order = folio_order(page_folio(page)); + +next_put: + i += NR_PAGES(order); } return -ENOMEM; } @@ -509,7 +553,7 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) unsigned long npages, mpages = 0; struct page **pages; unsigned long *src, *dst; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; void *buf; int i, err = 0; unsigned int retry_count = 2; @@ -520,7 +564,7 @@ retry: if (!mmget_not_zero(devmem_allocation->mm)) return -EFAULT; - buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + + buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*pagemap_addr) + sizeof(*pages), GFP_KERNEL); if (!buf) { err = -ENOMEM; @@ -528,8 +572,8 @@ retry: } src = buf; dst = buf + (sizeof(*src) * npages); - dma_addr = buf + (2 * sizeof(*src) * npages); - pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; + pagemap_addr = buf + (2 * sizeof(*src) * npages); + pages = buf + (2 * sizeof(*src) + sizeof(*pagemap_addr)) * npages; err = ops->populate_devmem_pfn(devmem_allocation, npages, src); if (err) @@ -544,7 +588,7 @@ retry: if (err || !mpages) goto err_finalize; - err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, dst, npages, DMA_FROM_DEVICE); if (err) goto err_finalize; @@ -552,7 +596,7 @@ retry: for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(src[i]); - err = ops->copy_to_ram(pages, dma_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages); if (err) goto err_finalize; @@ -561,7 +605,7 @@ err_finalize: drm_pagemap_migration_unlock_put_pages(npages, dst); migrate_device_pages(src, dst, npages); migrate_device_finalize(src, dst, npages); - drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, DMA_FROM_DEVICE); err_free: kvfree(buf); @@ -612,7 +656,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, struct device *dev = NULL; unsigned long npages, mpages = 0; struct page **pages; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long start, end; void *buf; int i, err = 0; @@ -637,14 +681,14 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, migrate.end = end; npages = npages_in_range(start, end); - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + sizeof(*pages), GFP_KERNEL); if (!buf) { err = -ENOMEM; goto err_out; } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; + pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; migrate.vma = vas; migrate.src = buf; @@ -680,7 +724,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, if (err) goto err_finalize; - err = drm_pagemap_migrate_map_pages(dev, dma_addr, migrate.dst, npages, + err = drm_pagemap_migrate_map_pages(dev, pagemap_addr, migrate.dst, npages, DMA_FROM_DEVICE); if (err) goto err_finalize; @@ -688,7 +732,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(migrate.src[i]); - err = ops->copy_to_ram(pages, dma_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages); if (err) goto err_finalize; @@ -698,7 +742,7 @@ err_finalize: migrate_vma_pages(&migrate); migrate_vma_finalize(&migrate); if (dev) - drm_pagemap_migrate_unmap_pages(dev, dma_addr, npages, + drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, npages, DMA_FROM_DEVICE); err_free: kvfree(buf); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index c8bb28dccdc1..d1e6598ea3bc 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -134,6 +134,9 @@ void drm_panel_prepare(struct drm_panel *panel) panel->prepared = true; list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_prepared) + continue; + ret = follower->funcs->panel_prepared(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", @@ -179,6 +182,9 @@ void drm_panel_unprepare(struct drm_panel *panel) mutex_lock(&panel->follower_lock); list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_unpreparing) + continue; + ret = follower->funcs->panel_unpreparing(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", @@ -209,6 +215,7 @@ EXPORT_SYMBOL(drm_panel_unprepare); */ void drm_panel_enable(struct drm_panel *panel) { + struct drm_panel_follower *follower; int ret; if (!panel) @@ -219,10 +226,12 @@ void drm_panel_enable(struct drm_panel *panel) return; } + mutex_lock(&panel->follower_lock); + if (panel->funcs && panel->funcs->enable) { ret = panel->funcs->enable(panel); if (ret < 0) - return; + goto exit; } panel->enabled = true; @@ -230,6 +239,19 @@ void drm_panel_enable(struct drm_panel *panel) if (ret < 0) DRM_DEV_INFO(panel->dev, "failed to enable backlight: %d\n", ret); + + list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_enabled) + continue; + + ret = follower->funcs->panel_enabled(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_enabled, ret); + } + +exit: + mutex_unlock(&panel->follower_lock); } EXPORT_SYMBOL(drm_panel_enable); @@ -243,6 +265,7 @@ EXPORT_SYMBOL(drm_panel_enable); */ void drm_panel_disable(struct drm_panel *panel) { + struct drm_panel_follower *follower; int ret; if (!panel) @@ -262,6 +285,18 @@ void drm_panel_disable(struct drm_panel *panel) return; } + mutex_lock(&panel->follower_lock); + + list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_disabling) + continue; + + ret = follower->funcs->panel_disabling(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_disabling, ret); + } + ret = backlight_disable(panel->backlight); if (ret < 0) DRM_DEV_INFO(panel->dev, "failed to disable backlight: %d\n", @@ -270,9 +305,12 @@ void drm_panel_disable(struct drm_panel *panel) if (panel->funcs && panel->funcs->disable) { ret = panel->funcs->disable(panel); if (ret < 0) - return; + goto exit; } panel->enabled = false; + +exit: + mutex_unlock(&panel->follower_lock); } EXPORT_SYMBOL(drm_panel_disable); @@ -539,13 +577,13 @@ EXPORT_SYMBOL(drm_is_panel_follower); * @follower_dev: The 'struct device' for the follower. * @follower: The panel follower descriptor for the follower. * - * A panel follower is called right after preparing the panel and right before - * unpreparing the panel. It's primary intention is to power on an associated - * touchscreen, though it could be used for any similar devices. Multiple - * devices are allowed the follow the same panel. + * A panel follower is called right after preparing/enabling the panel and right + * before unpreparing/disabling the panel. It's primary intention is to power on + * an associated touchscreen, though it could be used for any similar devices. + * Multiple devices are allowed the follow the same panel. * - * If a follower is added to a panel that's already been turned on, the - * follower's prepare callback is called right away. + * If a follower is added to a panel that's already been prepared/enabled, the + * follower's prepared/enabled callback is called right away. * * The "panel" property of the follower points to the panel to be followed. * @@ -569,12 +607,18 @@ int drm_panel_add_follower(struct device *follower_dev, mutex_lock(&panel->follower_lock); list_add_tail(&follower->list, &panel->followers); - if (panel->prepared) { + if (panel->prepared && follower->funcs->panel_prepared) { ret = follower->funcs->panel_prepared(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", follower->funcs->panel_prepared, ret); } + if (panel->enabled && follower->funcs->panel_enabled) { + ret = follower->funcs->panel_enabled(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_enabled, ret); + } mutex_unlock(&panel->follower_lock); @@ -587,7 +631,8 @@ EXPORT_SYMBOL(drm_panel_add_follower); * @follower: The panel follower descriptor for the follower. * * Undo drm_panel_add_follower(). This includes calling the follower's - * unprepare function if we're removed from a panel that's currently prepared. + * unpreparing/disabling function if we're removed from a panel that's currently + * prepared/enabled. * * Return: 0 or an error code. */ @@ -598,7 +643,13 @@ void drm_panel_remove_follower(struct drm_panel_follower *follower) mutex_lock(&panel->follower_lock); - if (panel->prepared) { + if (panel->enabled && follower->funcs->panel_disabling) { + ret = follower->funcs->panel_disabling(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_disabling, ret); + } + if (panel->prepared && follower->funcs->panel_unpreparing) { ret = follower->funcs->panel_unpreparing(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", diff --git a/drivers/gpu/drm/drm_panel_backlight_quirks.c b/drivers/gpu/drm/drm_panel_backlight_quirks.c index 598f812b7cb3..537dc6dd0534 100644 --- a/drivers/gpu/drm/drm_panel_backlight_quirks.c +++ b/drivers/gpu/drm/drm_panel_backlight_quirks.c @@ -8,23 +8,26 @@ #include <drm/drm_edid.h> #include <drm/drm_utils.h> -struct drm_panel_min_backlight_quirk { - struct { - enum dmi_field field; - const char * const value; - } dmi_match; +struct drm_panel_match { + enum dmi_field field; + const char * const value; +}; + +struct drm_get_panel_backlight_quirk { + struct drm_panel_match dmi_match; + struct drm_panel_match dmi_match_other; struct drm_edid_ident ident; - u8 min_brightness; + struct drm_panel_backlight_quirk quirk; }; -static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks[] = { +static const struct drm_get_panel_backlight_quirk drm_panel_min_backlight_quirks[] = { /* 13 inch matte panel */ { .dmi_match.field = DMI_BOARD_VENDOR, .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0bca), .ident.name = "NE135FBM-N41", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, }, /* 13 inch glossy panel */ { @@ -32,7 +35,7 @@ static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x095f), .ident.name = "NE135FBM-N41", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, }, /* 13 inch 2.8k panel */ { @@ -40,56 +43,114 @@ static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0cb4), .ident.name = "NE135A1M-NY1", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, + }, + /* Steam Deck models */ + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "Valve", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "Jupiter", + .quirk = { .min_brightness = 1, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "Valve", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "Galileo", + .quirk = { .min_brightness = 1, }, + }, + /* Have OLED Panels with brightness issue when last byte is 0/1 */ + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "AYANEO", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "AYANEO 3", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ZOTAC", + .dmi_match_other.field = DMI_BOARD_NAME, + .dmi_match_other.value = "G0A1W", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ZOTAC", + .dmi_match_other.field = DMI_BOARD_NAME, + .dmi_match_other.value = "G1A1W", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ONE-NETBOOK", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "ONEXPLAYER F1Pro", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ONE-NETBOOK", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "ONEXPLAYER F1 EVA-02", + .quirk = { .brightness_mask = 3, }, }, }; -static bool drm_panel_min_backlight_quirk_matches(const struct drm_panel_min_backlight_quirk *quirk, - const struct drm_edid *edid) +static bool drm_panel_min_backlight_quirk_matches( + const struct drm_get_panel_backlight_quirk *quirk, + const struct drm_edid *edid) { - if (!dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + if (quirk->dmi_match.field && + !dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + return false; + + if (quirk->dmi_match_other.field && + !dmi_match(quirk->dmi_match_other.field, + quirk->dmi_match_other.value)) return false; - if (!drm_edid_match(edid, &quirk->ident)) + if (quirk->ident.panel_id && !drm_edid_match(edid, &quirk->ident)) return false; return true; } /** - * drm_get_panel_min_brightness_quirk - Get minimum supported brightness level for a panel. + * drm_get_panel_backlight_quirk - Get backlight quirks for a panel * @edid: EDID of the panel to check * * This function checks for platform specific (e.g. DMI based) quirks * providing info on the minimum backlight brightness for systems where this - * cannot be probed correctly from the hard-/firm-ware. + * cannot be probed correctly from the hard-/firm-ware and other sources. * * Returns: - * A negative error value or - * an override value in the range [0, 255] representing 0-100% to be scaled to - * the drivers target range. + * a drm_panel_backlight_quirk struct if a quirk was found, otherwise an + * error pointer. */ -int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid) +const struct drm_panel_backlight_quirk * +drm_get_panel_backlight_quirk(const struct drm_edid *edid) { - const struct drm_panel_min_backlight_quirk *quirk; + const struct drm_get_panel_backlight_quirk *quirk; size_t i; if (!IS_ENABLED(CONFIG_DMI)) - return -ENODATA; + return ERR_PTR(-ENODATA); if (!edid) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < ARRAY_SIZE(drm_panel_min_backlight_quirks); i++) { quirk = &drm_panel_min_backlight_quirks[i]; if (drm_panel_min_backlight_quirk_matches(quirk, edid)) - return quirk->min_brightness; + return &quirk->quirk; } - return -ENODATA; + return ERR_PTR(-ENODATA); } -EXPORT_SYMBOL(drm_get_panel_min_brightness_quirk); +EXPORT_SYMBOL(drm_get_panel_backlight_quirk); MODULE_DESCRIPTION("Quirks for panel backlight overrides"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09a9b452e8b7..50c286c5cee8 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -381,6 +381,26 @@ struct DecFifo { len: usize, } +// On arm32 architecture, dividing an `u64` by a constant will generate a call +// to `__aeabi_uldivmod` which is not present in the kernel. +// So use the multiply by inverse method for this architecture. +fn div10(val: u64) -> u64 { + if cfg!(target_arch = "arm") { + let val_h = val >> 32; + let val_l = val & 0xFFFFFFFF; + let b_h: u64 = 0x66666666; + let b_l: u64 = 0x66666667; + + let tmp1 = val_h * b_l + ((val_l * b_l) >> 32); + let tmp2 = val_l * b_h + (tmp1 & 0xffffffff); + let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32); + + tmp3 >> 2 + } else { + val / 10 + } +} + impl DecFifo { fn push(&mut self, data: u64, len: usize) { let mut chunk = data; @@ -389,7 +409,7 @@ impl DecFifo { } for i in 0..len { self.decimals[i] = (chunk % 10) as u8; - chunk /= 10; + chunk = div10(chunk); } self.len += len; } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index a455c56dbbeb..b01ffa4d6509 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -18,6 +18,7 @@ #include <linux/gfp.h> #include <linux/i2c.h> #include <linux/kdev_t.h> +#include <linux/pci.h> #include <linux/property.h> #include <linux/slab.h> @@ -30,6 +31,8 @@ #include <drm/drm_property.h> #include <drm/drm_sysfs.h> +#include <asm/video.h> + #include "drm_internal.h" #include "drm_crtc_internal.h" @@ -508,6 +511,43 @@ void drm_sysfs_connector_property_event(struct drm_connector *connector, } EXPORT_SYMBOL(drm_sysfs_connector_property_event); +static ssize_t boot_display_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "1\n"); +} +static DEVICE_ATTR_RO(boot_display); + +static struct attribute *display_attrs[] = { + &dev_attr_boot_display.attr, + NULL +}; + +static umode_t boot_display_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj)->parent; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (video_is_primary_device(&pdev->dev)) + return a->mode; + } + + return 0; +} + +static const struct attribute_group display_attr_group = { + .attrs = display_attrs, + .is_visible = boot_display_visible, +}; + +static const struct attribute_group *card_dev_groups[] = { + &display_attr_group, + NULL +}; + struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) { const char *minor_str; @@ -531,6 +571,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) kdev->devt = MKDEV(DRM_MAJOR, minor->index); kdev->class = drm_class; + kdev->groups = card_dev_groups; kdev->type = &drm_sysfs_device_minor; } diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 805aa28c1723..b8d9b7251319 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -69,7 +69,6 @@ struct decon_context { void __iomem *regs; unsigned long irq_flags; bool i80_if; - bool suspended; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; @@ -132,9 +131,6 @@ static void decon_shadow_protect_win(struct decon_context *ctx, static void decon_wait_for_vblank(struct decon_context *ctx) { - if (ctx->suspended) - return; - atomic_set(&ctx->wait_vsync_event, 1); /* @@ -210,9 +206,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; u32 val, clkdiv; - if (ctx->suspended) - return; - /* nothing to do if we haven't set the mode yet */ if (mode->htotal == 0 || mode->vtotal == 0) return; @@ -274,9 +267,6 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return -EPERM; - if (!test_and_set_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -299,9 +289,6 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return; - if (test_and_clear_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -404,9 +391,6 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, true); } @@ -427,9 +411,6 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, unsigned int pitch = fb->pitches[0]; unsigned int vidw_addr0_base = ctx->data->vidw_buf_start_base; - if (ctx->suspended) - return; - /* * SHADOWCON/PRTCON register is used for enabling timing. * @@ -517,9 +498,6 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, unsigned int win = plane->index; u32 val; - if (ctx->suspended) - return; - /* protect windows */ decon_shadow_protect_win(ctx, win, true); @@ -538,9 +516,6 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); exynos_crtc_handle_event(crtc); @@ -568,9 +543,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int ret; - if (!ctx->suspended) - return; - ret = pm_runtime_resume_and_get(ctx->dev); if (ret < 0) { DRM_DEV_ERROR(ctx->dev, "failed to enable DECON device.\n"); @@ -584,8 +556,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) decon_enable_vblank(ctx->crtc); decon_commit(ctx->crtc); - - ctx->suspended = false; } static void decon_atomic_disable(struct exynos_drm_crtc *crtc) @@ -593,9 +563,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - /* * We need to make sure that all windows are disabled before we * suspend that connector. Otherwise we might try to scan from @@ -605,8 +572,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) decon_disable_plane(crtc, &ctx->planes[i]); pm_runtime_put_sync(ctx->dev); - - ctx->suspended = true; } static const struct exynos_drm_crtc_ops decon_crtc_ops = { @@ -727,7 +692,6 @@ static int decon_probe(struct platform_device *pdev) return -ENOMEM; ctx->dev = dev; - ctx->suspended = true; ctx->data = of_device_get_match_data(dev); i80_if_timings = of_get_child_by_name(dev->of_node, "i80-if-timings"); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 896a03639e2d..c4d098ab7863 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -154,6 +154,11 @@ static const struct samsung_dsim_plat_data exynos5433_dsi_pdata = { .host_ops = &exynos_dsi_exynos_host_ops, }; +static const struct samsung_dsim_plat_data exynos7870_dsi_pdata = { + .hw_type = DSIM_TYPE_EXYNOS7870, + .host_ops = &exynos_dsi_exynos_host_ops, +}; + static const struct of_device_id exynos_dsi_of_match[] = { { .compatible = "samsung,exynos3250-mipi-dsi", @@ -175,6 +180,10 @@ static const struct of_device_id exynos_dsi_of_match[] = { .compatible = "samsung,exynos5433-mipi-dsi", .data = &exynos5433_dsi_pdata, }, + { + .compatible = "samsung,exynos7870-mipi-dsi", + .data = &exynos7870_dsi_pdata, + }, { /* sentinel. */ } }; MODULE_DEVICE_TABLE(of, exynos_dsi_of_match); diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c index 4a37136f90f4..32d31e5f5f1a 100644 --- a/drivers/gpu/drm/gma500/fbdev.c +++ b/drivers/gpu/drm/gma500/fbdev.c @@ -120,7 +120,6 @@ static void psb_fbdev_fb_destroy(struct fb_info *info) drm_fb_helper_fini(fb_helper); drm_framebuffer_unregister_private(fb); - fb->obj[0] = NULL; drm_framebuffer_cleanup(fb); kfree(fb); @@ -245,7 +244,6 @@ int psb_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, err_drm_framebuffer_unregister_private: drm_framebuffer_unregister_private(fb); - fb->obj[0] = NULL; drm_framebuffer_cleanup(fb); kfree(fb); err_drm_gem_object_put: diff --git a/drivers/gpu/drm/gud/gud_connector.c b/drivers/gpu/drm/gud/gud_connector.c index 0f07d77c5d52..4a15695fa933 100644 --- a/drivers/gpu/drm/gud/gud_connector.c +++ b/drivers/gpu/drm/gud/gud_connector.c @@ -16,7 +16,6 @@ #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -607,13 +606,16 @@ int gud_connector_fill_properties(struct drm_connector_state *connector_state, return gconn->num_properties; } +static const struct drm_encoder_funcs gud_drm_simple_encoder_funcs_cleanup = { + .destroy = drm_encoder_cleanup, +}; + static int gud_connector_create(struct gud_device *gdrm, unsigned int index, struct gud_connector_descriptor_req *desc) { struct drm_device *drm = &gdrm->drm; struct gud_connector *gconn; struct drm_connector *connector; - struct drm_encoder *encoder; int ret, connector_type; u32 flags; @@ -681,20 +683,13 @@ static int gud_connector_create(struct gud_device *gdrm, unsigned int index, return ret; } - /* The first connector is attached to the existing simple pipe encoder */ - if (!connector->index) { - encoder = &gdrm->pipe.encoder; - } else { - encoder = &gconn->encoder; - - ret = drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_NONE); - if (ret) - return ret; - - encoder->possible_crtcs = 1; - } + gconn->encoder.possible_crtcs = drm_crtc_mask(&gdrm->crtc); + ret = drm_encoder_init(drm, &gconn->encoder, &gud_drm_simple_encoder_funcs_cleanup, + DRM_MODE_ENCODER_NONE, NULL); + if (ret) + return ret; - return drm_connector_attach_encoder(connector, encoder); + return drm_connector_attach_encoder(connector, &gconn->encoder); } int gud_get_connectors(struct gud_device *gdrm) diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index b52a12cbba3e..b7345c8d823d 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -16,6 +16,7 @@ #include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> @@ -27,7 +28,6 @@ #include <drm/drm_managed.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -289,7 +289,7 @@ static int gud_get_properties(struct gud_device *gdrm) * but mask out any additions on future devices. */ val &= GUD_ROTATION_MASK; - ret = drm_plane_create_rotation_property(&gdrm->pipe.plane, + ret = drm_plane_create_rotation_property(&gdrm->plane, DRM_MODE_ROTATE_0, val); break; default: @@ -338,10 +338,30 @@ static int gud_stats_debugfs(struct seq_file *m, void *data) return 0; } -static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = { - .check = gud_pipe_check, - .update = gud_pipe_update, - DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS +static const struct drm_crtc_helper_funcs gud_crtc_helper_funcs = { + .atomic_check = drm_crtc_helper_atomic_check +}; + +static const struct drm_crtc_funcs gud_crtc_funcs = { + .reset = drm_atomic_helper_crtc_reset, + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, +}; + +static const struct drm_plane_helper_funcs gud_plane_helper_funcs = { + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = gud_plane_atomic_check, + .atomic_update = gud_plane_atomic_update, +}; + +static const struct drm_plane_funcs gud_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + DRM_GEM_SHADOW_PLANE_FUNCS, }; static const struct drm_mode_config_funcs gud_mode_config_funcs = { @@ -350,7 +370,7 @@ static const struct drm_mode_config_funcs gud_mode_config_funcs = { .atomic_commit = drm_atomic_helper_commit, }; -static const u64 gud_pipe_modifiers[] = { +static const u64 gud_plane_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; @@ -567,12 +587,17 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) return -ENOMEM; } - ret = drm_simple_display_pipe_init(drm, &gdrm->pipe, &gud_pipe_funcs, - formats, num_formats, - gud_pipe_modifiers, NULL); + ret = drm_universal_plane_init(drm, &gdrm->plane, 0, + &gud_plane_funcs, + formats, num_formats, + gud_plane_modifiers, + DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; + drm_plane_helper_add(&gdrm->plane, &gud_plane_helper_funcs); + drm_plane_enable_fb_damage_clips(&gdrm->plane); + devm_kfree(dev, formats); devm_kfree(dev, formats_dev); @@ -582,7 +607,12 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) return ret; } - drm_plane_enable_fb_damage_clips(&gdrm->pipe.plane); + ret = drm_crtc_init_with_planes(drm, &gdrm->crtc, &gdrm->plane, NULL, + &gud_crtc_funcs, NULL); + if (ret) + return ret; + + drm_crtc_helper_add(&gdrm->crtc, &gud_crtc_helper_funcs); ret = gud_get_connectors(gdrm); if (ret) { diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index d6fb25388722..d27c31648341 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h @@ -11,11 +11,11 @@ #include <uapi/drm/drm_fourcc.h> #include <drm/drm_modes.h> -#include <drm/drm_simple_kms_helper.h> struct gud_device { struct drm_device drm; - struct drm_simple_display_pipe pipe; + struct drm_plane plane; + struct drm_crtc crtc; struct work_struct work; u32 flags; const struct drm_format_info *xrgb8888_emulation_format; @@ -62,11 +62,10 @@ int gud_usb_set_u8(struct gud_device *gdrm, u8 request, u8 val); void gud_clear_damage(struct gud_device *gdrm); void gud_flush_work(struct work_struct *work); -int gud_pipe_check(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *new_crtc_state); -void gud_pipe_update(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state); +int gud_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state); +void gud_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *atomic_state); int gud_connector_fill_properties(struct drm_connector_state *connector_state, struct gud_property_req *properties); int gud_get_connectors(struct gud_device *gdrm); diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index 8d548d08f127..54d9aa9998e5 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -20,7 +20,6 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -451,14 +450,15 @@ static void gud_fb_handle_damage(struct gud_device *gdrm, struct drm_framebuffer gud_flush_damage(gdrm, fb, src, !fb->obj[0]->import_attach, damage); } -int gud_pipe_check(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *new_crtc_state) +int gud_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) { - struct gud_device *gdrm = to_gud_device(pipe->crtc.dev); - struct drm_plane_state *old_plane_state = pipe->plane.state; - const struct drm_display_mode *mode = &new_crtc_state->mode; - struct drm_atomic_state *state = new_plane_state->state; + struct gud_device *gdrm = to_gud_device(plane->dev); + struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *crtc = new_plane_state->crtc; + struct drm_crtc_state *crtc_state; + const struct drm_display_mode *mode; struct drm_framebuffer *old_fb = old_plane_state->fb; struct drm_connector_state *connector_state = NULL; struct drm_framebuffer *fb = new_plane_state->fb; @@ -469,20 +469,37 @@ int gud_pipe_check(struct drm_simple_display_pipe *pipe, int idx, ret; size_t len; - if (WARN_ON_ONCE(!fb)) + if (drm_WARN_ON_ONCE(plane->dev, !fb)) return -EINVAL; + if (drm_WARN_ON_ONCE(plane->dev, !crtc)) + return -EINVAL; + + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + + mode = &crtc_state->mode; + + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + false, false); + if (ret) + return ret; + + if (!new_plane_state->visible) + return 0; + if (old_plane_state->rotation != new_plane_state->rotation) - new_crtc_state->mode_changed = true; + crtc_state->mode_changed = true; if (old_fb && old_fb->format != format) - new_crtc_state->mode_changed = true; + crtc_state->mode_changed = true; - if (!new_crtc_state->mode_changed && !new_crtc_state->connectors_changed) + if (!crtc_state->mode_changed && !crtc_state->connectors_changed) return 0; /* Only one connector is supported */ - if (hweight32(new_crtc_state->connector_mask) != 1) + if (hweight32(crtc_state->connector_mask) != 1) return -EINVAL; if (format->format == DRM_FORMAT_XRGB8888 && gdrm->xrgb8888_emulation_format) @@ -500,7 +517,7 @@ int gud_pipe_check(struct drm_simple_display_pipe *pipe, if (!connector_state) { struct drm_connector_list_iter conn_iter; - drm_connector_list_iter_begin(pipe->crtc.dev, &conn_iter); + drm_connector_list_iter_begin(plane->dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->state->crtc) { connector_state = connector->state; @@ -567,16 +584,18 @@ out: return ret; } -void gud_pipe_update(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state) +void gud_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *atomic_state) { - struct drm_device *drm = pipe->crtc.dev; + struct drm_device *drm = plane->dev; struct gud_device *gdrm = to_gud_device(drm); - struct drm_plane_state *state = pipe->plane.state; - struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); - struct drm_framebuffer *fb = state->fb; - struct drm_crtc *crtc = &pipe->crtc; + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(atomic_state, plane); + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(atomic_state, plane); + struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(new_state); + struct drm_framebuffer *fb = new_state->fb; + struct drm_crtc *crtc = new_state->crtc; struct drm_rect damage; + struct drm_atomic_helper_damage_iter iter; int ret, idx; if (crtc->state->mode_changed || !crtc->state->enable) { @@ -611,7 +630,8 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, if (ret) goto ctrl_disable; - if (drm_atomic_helper_damage_merged(old_state, state, &damage)) + drm_atomic_helper_damage_iter_init(&iter, old_state, new_state); + drm_atomic_for_each_plane_damage(&iter, &damage) gud_fb_handle_damage(gdrm, fb, &shadow_plane_state->data[0], &damage); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index 74f7832ea53e..0726cb5b736e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -325,6 +325,17 @@ static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) return hibmc_dp_link_reduce_rate(dp); } +static void hibmc_dp_update_caps(struct hibmc_dp_dev *dp) +{ + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + if (dp->link.cap.link_rate > DP_LINK_BW_8_1 || !dp->link.cap.link_rate) + dp->link.cap.link_rate = DP_LINK_BW_8_1; + + dp->link.cap.lanes = dp->dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; + if (dp->link.cap.lanes > HIBMC_DP_LANE_NUM_MAX) + dp->link.cap.lanes = HIBMC_DP_LANE_NUM_MAX; +} + int hibmc_dp_link_training(struct hibmc_dp_dev *dp) { struct hibmc_dp_link *link = &dp->link; @@ -334,8 +345,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; - dp->link.cap.lanes = 0x2; + hibmc_dp_update_caps(dp); ret = hibmc_dp_get_serdes_rate_cfg(dp); if (ret < 0) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 768b97f9e74a..289304500ab0 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -32,7 +32,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "hibmc-vblank", "hibmc-hpd" }; static irqreturn_t hibmc_interrupt(int irq, void *arg) { @@ -115,6 +115,8 @@ static const struct drm_mode_config_funcs hibmc_mode_funcs = { static int hibmc_kms_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; + struct drm_encoder *encoder; + u32 clone_mask = 0; int ret; ret = drmm_mode_config_init(dev); @@ -154,6 +156,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + drm_for_each_encoder(encoder, dev) + clone_mask |= drm_encoder_mask(encoder); + + drm_for_each_encoder(encoder, dev) + encoder->possible_clones = clone_mask; + return 0; } @@ -277,7 +285,6 @@ static void hibmc_unload(struct drm_device *dev) static int hibmc_msi_init(struct drm_device *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - char name[32] = {0}; int valid_irq_num; int irq; int ret; @@ -292,9 +299,6 @@ static int hibmc_msi_init(struct drm_device *dev) valid_irq_num = ret; for (int i = 0; i < valid_irq_num; i++) { - snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", - dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); - irq = pci_irq_vector(pdev, i); if (i) @@ -302,10 +306,10 @@ static int hibmc_msi_init(struct drm_device *dev) ret = devm_request_threaded_irq(&pdev->dev, irq, hibmc_dp_interrupt, hibmc_dp_hpd_isr, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); else ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); if (ret) { drm_err(dev, "install irq failed: %d\n", ret); return ret; @@ -323,13 +327,13 @@ static int hibmc_load(struct drm_device *dev) ret = hibmc_hw_init(priv); if (ret) - goto err; + return ret; ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (ret) { drm_err(dev, "Error initializing VRAM MM; %d\n", ret); - goto err; + return ret; } ret = hibmc_kms_init(priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 274feabe7df0..ca8502e2760c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -69,6 +69,7 @@ int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +void hibmc_ddc_del(struct hibmc_vdac *vdac); int hibmc_dp_init(struct hibmc_drm_private *priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 99b3b77b5445..44860011855e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -95,3 +95,8 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) return i2c_bit_add_bus(&vdac->adapter); } + +void hibmc_ddc_del(struct hibmc_vdac *vdac) +{ + i2c_del_adapter(&vdac->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index e8a527ede854..841e81f47b68 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -53,7 +53,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) { struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&vdac->adapter); + hibmc_ddc_del(vdac); drm_connector_cleanup(connector); } @@ -110,7 +110,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_DAC, NULL); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); - return ret; + goto err; } drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs); @@ -121,7 +121,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); - return ret; + goto err; } drm_connector_helper_add(connector, &hibmc_connector_helper_funcs); @@ -131,4 +131,9 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; + +err: + hibmc_ddc_del(vdac); + + return ret; } diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 1852e0804942..3562a02ef7ad 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -50,7 +50,7 @@ config DRM_I915_DEBUG select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y select DRM_EXPORT_FOR_TESTS if m - select DRM_DEBUG_SELFTEST + select DRM_KUNIT_TEST if KUNIT select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_WERROR diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 87f6b9602b16..aa159f9ce12f 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -424,17 +424,6 @@ intel_dp_link_down(struct intel_encoder *encoder, drm_dbg_kms(display->drm, "\n"); - if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(display) && port != PORT_A)) { - intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT; - intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; - } else { - intel_dp->DP &= ~DP_LINK_TRAIN_MASK; - intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE; - } - intel_de_write(display, intel_dp->output_reg, intel_dp->DP); - intel_de_posting_read(display, intel_dp->output_reg); - intel_dp->DP &= ~DP_PORT_EN; intel_de_write(display, intel_dp->output_reg, intel_dp->DP); intel_de_posting_read(display, intel_dp->output_reg); @@ -612,6 +601,19 @@ cpt_set_link_train(struct intel_dp *intel_dp, } static void +cpt_set_idle_link_train(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT; + intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; + + intel_de_write(display, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(display, intel_dp->output_reg); +} + +static void g4x_set_link_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 dp_train_pat) @@ -639,6 +641,19 @@ g4x_set_link_train(struct intel_dp *intel_dp, intel_de_posting_read(display, intel_dp->output_reg); } +static void +g4x_set_idle_link_train(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + intel_dp->DP &= ~DP_LINK_TRAIN_MASK; + intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE; + + intel_de_write(display, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(display, intel_dp->output_reg); +} + static void intel_dp_enable_port(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -1285,12 +1300,10 @@ bool g4x_dp_init(struct intel_display *display, drm_dbg_kms(display->drm, "No VBT child device for DP-%c\n", port_name(port)); - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return false; - dig_port->aux_ch = AUX_CH_NONE; - intel_connector = intel_connector_alloc(); if (!intel_connector) goto err_connector_alloc; @@ -1300,8 +1313,6 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->devdata = devdata; - mutex_init(&dig_port->hdcp.mutex); - if (drm_encoder_init(display->drm, &intel_encoder->base, &intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS, "DP %c", port_name(port))) @@ -1342,10 +1353,13 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->audio_disable = g4x_dp_audio_disable; if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(display) && port != PORT_A)) + (HAS_PCH_CPT(display) && port != PORT_A)) { dig_port->dp.set_link_train = cpt_set_link_train; - else + dig_port->dp.set_idle_link_train = cpt_set_idle_link_train; + } else { dig_port->dp.set_link_train = g4x_set_link_train; + dig_port->dp.set_idle_link_train = g4x_set_idle_link_train; + } if (display->platform.cherryview) intel_encoder->set_signal_levels = chv_set_signal_levels; @@ -1368,7 +1382,6 @@ bool g4x_dp_init(struct intel_display *display, } dig_port->dp.output_reg = output_reg; - dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; intel_encoder->power_domain = intel_display_power_ddi_lanes_domain(display, port); diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 2610f5702fb9..f6e2d1ed5639 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -19,7 +19,7 @@ #include "intel_display_types.h" #include "intel_dp_aux.h" #include "intel_dpio_phy.h" -#include "intel_fdi.h" +#include "intel_encoder.h" #include "intel_fifo_underrun.h" #include "intel_hdmi.h" #include "intel_hotplug.h" @@ -135,11 +135,8 @@ static int g4x_hdmi_compute_config(struct intel_encoder *encoder, struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - if (HAS_PCH_SPLIT(display)) { + if (HAS_PCH_SPLIT(display)) crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) - return -EINVAL; - } if (display->platform.g4x) crtc_state->has_hdmi_sink = g4x_compute_has_hdmi_sink(state, crtc); @@ -690,12 +687,10 @@ bool g4x_hdmi_init(struct intel_display *display, drm_dbg_kms(display->drm, "No VBT child device for HDMI-%c\n", port_name(port)); - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return false; - dig_port->aux_ch = AUX_CH_NONE; - intel_connector = intel_connector_alloc(); if (!intel_connector) goto err_connector_alloc; @@ -704,8 +699,6 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->devdata = devdata; - mutex_init(&dig_port->hdcp.mutex); - if (drm_encoder_init(display->drm, &intel_encoder->base, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port))) @@ -767,8 +760,6 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->cloneable |= BIT(INTEL_OUTPUT_HDMI); dig_port->hdmi.hdmi_reg = hdmi_reg; - dig_port->dp.output_reg = INVALID_MMIO_REG; - dig_port->max_lanes = 4; intel_infoframe_init(dig_port); diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index f291ced989dc..3eb96d8abba8 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -155,8 +155,7 @@ static bool i9xx_plane_has_windowing(struct intel_plane *plane) i9xx_plane == PLANE_C; } -static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_plane_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -355,11 +354,24 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - plane_state->ctl = i9xx_plane_ctl(crtc_state, plane_state); + plane_state->ctl = i9xx_plane_ctl(plane_state); return 0; } +static u32 i8xx_plane_surf_offset(const struct intel_plane_state *plane_state) +{ + int x = plane_state->view.color_plane[0].x; + int y = plane_state->view.color_plane[0].y; + + return intel_fb_xy_to_linear(x, y, plane_state, 0); +} + +u32 i965_plane_surf_offset(const struct intel_plane_state *plane_state) +{ + return plane_state->view.color_plane[0].offset; +} + static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -463,7 +475,7 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; int x = plane_state->view.color_plane[0].x; int y = plane_state->view.color_plane[0].y; - u32 dspcntr, dspaddr_offset, linear_offset; + u32 dspcntr; dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); @@ -472,13 +484,6 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, crtc_state->async_flip_planes & BIT(plane->id)) dspcntr |= DISP_ASYNC_FLIP; - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - - if (DISPLAY_VER(display) >= 4) - dspaddr_offset = plane_state->view.color_plane[0].offset; - else - dspaddr_offset = linear_offset; - if (display->platform.cherryview && i9xx_plane == PLANE_B) { int crtc_x = plane_state->uapi.dst.x1; int crtc_y = plane_state->uapi.dst.y1; @@ -498,7 +503,7 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, DISP_OFFSET_Y(y) | DISP_OFFSET_X(x)); } else if (DISPLAY_VER(display) >= 4) { intel_de_write_fw(display, DSPLINOFF(display, i9xx_plane), - linear_offset); + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, DSPTILEOFF(display, i9xx_plane), DISP_OFFSET_Y(y) | DISP_OFFSET_X(x)); } @@ -511,11 +516,9 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, DSPCNTR(display, i9xx_plane), dspcntr); if (DISPLAY_VER(display) >= 4) - intel_de_write_fw(display, DSPSURF(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPSURF(display, i9xx_plane), plane_state->surf); else - intel_de_write_fw(display, DSPADDR(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPADDR(display, i9xx_plane), plane_state->surf); } static void i830_plane_update_arm(struct intel_dsb *dsb, @@ -604,16 +607,13 @@ g4x_primary_async_flip(struct intel_dsb *dsb, { struct intel_display *display = to_intel_display(plane); u32 dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); - u32 dspaddr_offset = plane_state->view.color_plane[0].offset; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; if (async_flip) dspcntr |= DISP_ASYNC_FLIP; intel_de_write_fw(display, DSPCNTR(display, i9xx_plane), dspcntr); - - intel_de_write_fw(display, DSPSURF(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPSURF(display, i9xx_plane), plane_state->surf); } static void @@ -624,11 +624,9 @@ vlv_primary_async_flip(struct intel_dsb *dsb, bool async_flip) { struct intel_display *display = to_intel_display(plane); - u32 dspaddr_offset = plane_state->view.color_plane[0].offset; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; - intel_de_write_fw(display, DSPADDR_VLV(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPADDR_VLV(display, i9xx_plane), plane_state->surf); } static void @@ -1037,6 +1035,11 @@ intel_primary_plane_create(struct intel_display *display, enum pipe pipe) plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; + if (DISPLAY_VER(display) >= 4) + plane->surf_offset = i965_plane_surf_offset; + else + plane->surf_offset = i8xx_plane_surf_offset; + if (DISPLAY_VER(display) >= 5 || display->platform.g4x) plane->capture_error = g4x_primary_capture_error; else if (DISPLAY_VER(display) >= 4) @@ -1254,24 +1257,21 @@ bool i9xx_fixup_initial_plane_config(struct intel_crtc *crtc, const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; - u32 base; if (!plane_state->uapi.visible) return false; - base = intel_plane_ggtt_offset(plane_state); - /* * We may have moved the surface to a different * part of ggtt, make the plane aware of that. */ - if (plane_config->base == base) + if (plane_config->base == plane_state->surf) return false; if (DISPLAY_VER(display) >= 4) - intel_de_write(display, DSPSURF(display, i9xx_plane), base); + intel_de_write(display, DSPSURF(display, i9xx_plane), plane_state->surf); else - intel_de_write(display, DSPADDR(display, i9xx_plane), base); + intel_de_write(display, DSPADDR(display, i9xx_plane), plane_state->surf); return true; } diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.h b/drivers/gpu/drm/i915/display/i9xx_plane.h index d90546d60855..565dab751301 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.h +++ b/drivers/gpu/drm/i915/display/i9xx_plane.h @@ -24,6 +24,7 @@ unsigned int vlv_plane_min_alignment(struct intel_plane *plane, const struct drm_framebuffer *fb, int colot_plane); int i9xx_check_plane_surface(struct intel_plane_state *plane_state); +u32 i965_plane_surf_offset(const struct intel_plane_state *plane_state); struct intel_plane * intel_primary_plane_create(struct intel_display *display, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 1f9db5118777..fd3b7b35f351 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -3,6 +3,10 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/iopoll.h> + +#include "soc/intel_dram.h" + #include "i915_drv.h" #include "i915_reg.h" #include "i9xx_wm.h" @@ -85,7 +89,8 @@ static const struct cxsr_latency cxsr_latency_table[] = { static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); + const struct dram_info *dram_info = intel_dram_info(display->drm); + bool is_ddr3 = dram_info->type == INTEL_DRAM_DDR3; int i; for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { @@ -93,15 +98,16 @@ static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *dis bool is_desktop = !display->platform.mobile; if (is_desktop == latency->is_desktop && - i915->is_ddr3 == latency->is_ddr3 && - DIV_ROUND_CLOSEST(i915->fsb_freq, 1000) == latency->fsb_freq && - DIV_ROUND_CLOSEST(i915->mem_freq, 1000) == latency->mem_freq) + is_ddr3 == latency->is_ddr3 && + DIV_ROUND_CLOSEST(dram_info->fsb_freq, 1000) == latency->fsb_freq && + DIV_ROUND_CLOSEST(dram_info->mem_freq, 1000) == latency->mem_freq) return latency; } drm_dbg_kms(display->drm, - "Could not find CxSR latency for DDR%s, FSB %u kHz, MEM %u kHz\n", - i915->is_ddr3 ? "3" : "2", i915->fsb_freq, i915->mem_freq); + "Could not find CxSR latency for %s, FSB %u kHz, MEM %u kHz\n", + intel_dram_type_str(dram_info->type), + dram_info->fsb_freq, dram_info->mem_freq); return NULL; } @@ -109,6 +115,7 @@ static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *dis static void chv_set_memory_dvfs(struct intel_display *display, bool enable) { u32 val; + int ret; vlv_punit_get(display->drm); @@ -121,8 +128,10 @@ static void chv_set_memory_dvfs(struct intel_display *display, bool enable) val |= FORCE_DDR_FREQ_REQ_ACK; vlv_punit_write(display->drm, PUNIT_REG_DDR_SETUP2, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2) & - FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2), + (val & FORCE_DDR_FREQ_REQ_ACK) == 0, + 500, 3000, false); + if (ret) drm_err(display->drm, "timed out waiting for Punit DDR DVFS request\n"); @@ -3902,6 +3911,7 @@ static void vlv_wm_get_hw_state(struct intel_display *display) struct vlv_wm_values *wm = &display->wm.vlv; struct intel_crtc *crtc; u32 val; + int ret; vlv_read_wm_values(display, wm); @@ -3928,8 +3938,10 @@ static void vlv_wm_get_hw_state(struct intel_display *display) val |= FORCE_DDR_FREQ_REQ_ACK; vlv_punit_write(display->drm, PUNIT_REG_DDR_SETUP2, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2) & - FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2), + (val & FORCE_DDR_FREQ_REQ_ACK) == 0, + 500, 3000, false); + if (ret) { drm_dbg_kms(display->drm, "Punit not acking DDR DVFS request, " "assuming DDR DVFS is disabled\n"); diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 8d9cb73a93a7..37faa8f19f6e 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -25,6 +25,8 @@ * Jani Nikula <jani.nikula@intel.com> */ +#include <linux/iopoll.h> + #include <drm/display/drm_dsc_helper.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_fixed.h> @@ -72,8 +74,12 @@ static int payload_credits_available(struct intel_display *display, static bool wait_for_header_credits(struct intel_display *display, enum transcoder dsi_trans, int hdr_credit) { - if (wait_for_us(header_credits_available(display, dsi_trans) >= - hdr_credit, 100)) { + int ret, available; + + ret = poll_timeout_us(available = header_credits_available(display, dsi_trans), + available >= hdr_credit, + 10, 100, false); + if (ret) { drm_err(display->drm, "DSI header credits not released\n"); return false; } @@ -84,8 +90,12 @@ static bool wait_for_header_credits(struct intel_display *display, static bool wait_for_payload_credits(struct intel_display *display, enum transcoder dsi_trans, int payld_credit) { - if (wait_for_us(payload_credits_available(display, dsi_trans) >= - payld_credit, 100)) { + int ret, available; + + ret = poll_timeout_us(available = payload_credits_available(display, dsi_trans), + available >= payld_credit, + 10, 100, false); + if (ret) { drm_err(display->drm, "DSI payload credits not released\n"); return false; } @@ -137,8 +147,11 @@ static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) /* wait for LP TX in progress bit to be cleared */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us(!(intel_de_read(display, DSI_LP_MSG(dsi_trans)) & - LPTX_IN_PROGRESS), 20)) + + ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), + LPTX_IN_PROGRESS, 0, + 20, 0, NULL); + if (ret) drm_err(display->drm, "LPTX bit not cleared\n"); } } @@ -516,13 +529,15 @@ static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; + int ret; for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), 0, DDI_BUF_CTL_ENABLE); - if (wait_for_us(!(intel_de_read(display, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), - 500)) + ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, 0, + 500, 0, NULL); + if (ret) drm_err(display->drm, "DDI port:%c buffer idle\n", port_name(port)); } @@ -838,9 +853,14 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* wait for link ready */ for_each_dsi_port(port, intel_dsi->ports) { + int ret; + dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us((intel_de_read(display, DSI_TRANS_FUNC_CONF(dsi_trans)) & - LINK_READY), 2500)) + + ret = intel_de_wait_custom(display, DSI_TRANS_FUNC_CONF(dsi_trans), + LINK_READY, LINK_READY, + 2500, 0, NULL); + if (ret) drm_err(display->drm, "DSI link not ready\n"); } } @@ -1321,6 +1341,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) enum port port; enum transcoder dsi_trans; u32 tmp; + int ret; /* disable periodic update mode */ if (is_cmd_mode(intel_dsi)) { @@ -1337,9 +1358,10 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) tmp &= ~LINK_ULPS_TYPE_LP11; intel_de_write(display, DSI_LP_MSG(dsi_trans), tmp); - if (wait_for_us((intel_de_read(display, DSI_LP_MSG(dsi_trans)) & - LINK_IN_ULPS), - 10)) + ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), + LINK_IN_ULPS, LINK_IN_ULPS, + 10, 0, NULL); + if (ret) drm_err(display->drm, "DSI link not in ULPS\n"); } @@ -1367,14 +1389,17 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; + int ret; gen11_dsi_ungate_clocks(encoder); for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE, 0); - if (wait_for_us((intel_de_read(display, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), - 8)) + ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, DDI_BUF_IS_IDLE, + 8, 0, NULL); + + if (ret) drm_err(display->drm, "DDI port:%c buffer not idle\n", port_name(port)); diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index dfdde8e4eabe..ed7a7ed486b5 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -16,6 +16,14 @@ #include "intel_psr.h" #include "intel_psr_regs.h" +#define SILENCE_PERIOD_MIN_TIME 80 +#define SILENCE_PERIOD_MAX_TIME 180 +#define SILENCE_PERIOD_TIME (SILENCE_PERIOD_MIN_TIME + \ + (SILENCE_PERIOD_MAX_TIME - \ + SILENCE_PERIOD_MIN_TIME) / 2) + +#define LFPS_CYCLE_COUNT 10 + bool intel_alpm_aux_wake_supported(struct intel_dp *intel_dp) { return intel_dp->alpm_dpcd & DP_ALPM_CAP; @@ -44,72 +52,49 @@ void intel_alpm_init(struct intel_dp *intel_dp) mutex_init(&intel_dp->alpm_parameters.lock); } -/* - * See Bspec: 71632 for the table - * - * Silence_period = tSilence,Min + ((tSilence,Max - tSilence,Min) / 2) - * - * Half cycle duration: - * - * Link rates 1.62 - 4.32 and tLFPS_Cycle = 70 ns - * FLOOR( (Link Rate * tLFPS_Cycle) / (2 * 10) ) - * - * Link rates 5.4 - 8.1 - * PORT_ALPM_LFPS_CTL[ LFPS Cycle Count ] = 10 - * LFPS Period chosen is the mid-point of the min:max values from the table - * FLOOR( LFPS Period in Symbol clocks / - * (2 * PORT_ALPM_LFPS_CTL[ LFPS Cycle Count ]) ) - */ -static bool _lnl_get_silence_period_and_lfps_half_cycle(int link_rate, - int *silence_period, - int *lfps_half_cycle) +static int get_silence_period_symbols(const struct intel_crtc_state *crtc_state) { - switch (link_rate) { - case 162000: - *silence_period = 20; - *lfps_half_cycle = 5; - break; - case 216000: - *silence_period = 27; - *lfps_half_cycle = 7; - break; - case 243000: - *silence_period = 31; - *lfps_half_cycle = 8; - break; - case 270000: - *silence_period = 34; - *lfps_half_cycle = 9; - break; - case 324000: - *silence_period = 41; - *lfps_half_cycle = 11; - break; - case 432000: - *silence_period = 56; - *lfps_half_cycle = 15; - break; - case 540000: - *silence_period = 69; - *lfps_half_cycle = 12; - break; - case 648000: - *silence_period = 84; - *lfps_half_cycle = 15; - break; - case 675000: - *silence_period = 87; - *lfps_half_cycle = 15; - break; - case 810000: - *silence_period = 104; - *lfps_half_cycle = 19; - break; - default: - *silence_period = *lfps_half_cycle = -1; - return false; + return SILENCE_PERIOD_TIME * intel_dp_link_symbol_clock(crtc_state->port_clock) / + 1000 / 1000; +} + +static int get_lfps_cycle_min_max_time(const struct intel_crtc_state *crtc_state, + int *min, int *max) +{ + if (crtc_state->port_clock < 540000) { + *min = 65 * LFPS_CYCLE_COUNT; + *max = 75 * LFPS_CYCLE_COUNT; + } else if (crtc_state->port_clock <= 810000) { + *min = 140; + *max = 800; + } else { + *min = *max = -1; + return -1; } - return true; + + return 0; +} + +static int get_lfps_cycle_time(const struct intel_crtc_state *crtc_state) +{ + int tlfps_cycle_min, tlfps_cycle_max, ret; + + ret = get_lfps_cycle_min_max_time(crtc_state, &tlfps_cycle_min, + &tlfps_cycle_max); + if (ret) + return ret; + + return tlfps_cycle_min + (tlfps_cycle_max - tlfps_cycle_min) / 2; +} + +static int get_lfps_half_cycle_clocks(const struct intel_crtc_state *crtc_state) +{ + int lfps_cycle_time = get_lfps_cycle_time(crtc_state); + + if (lfps_cycle_time < 0) + return -1; + + return lfps_cycle_time * crtc_state->port_clock / 1000 / 1000 / (2 * LFPS_CYCLE_COUNT); } /* @@ -131,21 +116,19 @@ static bool _lnl_get_silence_period_and_lfps_half_cycle(int link_rate, * tML_PHY_LOCK = TPS4 Length * ( 10 / (Link Rate in MHz) ) * TPS4 Length = 252 Symbols */ -static int _lnl_compute_aux_less_wake_time(int port_clock) +static int _lnl_compute_aux_less_wake_time(const struct intel_crtc_state *crtc_state) { int tphy2_p2_to_p0 = 12 * 1000; - int tlfps_period_max = 800; - int tsilence_max = 180; int t1 = 50 * 1000; int tps4 = 252; /* port_clock is link rate in 10kbit/s units */ - int tml_phy_lock = 1000 * 1000 * tps4 / port_clock; + int tml_phy_lock = 1000 * 1000 * tps4 / crtc_state->port_clock; int num_ml_phy_lock = 7 + DIV_ROUND_UP(6500, tml_phy_lock) + 1; int t2 = num_ml_phy_lock * tml_phy_lock; int tcds = 1 * t2; - return DIV_ROUND_UP(tphy2_p2_to_p0 + tlfps_period_max + tsilence_max + - t1 + tcds, 1000); + return DIV_ROUND_UP(tphy2_p2_to_p0 + get_lfps_cycle_time(crtc_state) + + SILENCE_PERIOD_TIME + t1 + tcds, 1000); } static int @@ -157,13 +140,13 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, lfps_half_cycle; aux_less_wake_time = - _lnl_compute_aux_less_wake_time(crtc_state->port_clock); + _lnl_compute_aux_less_wake_time(crtc_state); aux_less_wake_lines = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, aux_less_wake_time); + silence_period = get_silence_period_symbols(crtc_state); - if (!_lnl_get_silence_period_and_lfps_half_cycle(crtc_state->port_clock, - &silence_period, - &lfps_half_cycle)) + lfps_half_cycle = get_lfps_half_cycle_clocks(crtc_state); + if (lfps_half_cycle < 0) return false; if (aux_less_wake_lines > ALPM_CTL_AUX_LESS_WAKE_TIME_MASK || @@ -406,7 +389,7 @@ void intel_alpm_port_configure(struct intel_dp *intel_dp, PORT_ALPM_CTL_MAX_PHY_SWING_HOLD(0) | PORT_ALPM_CTL_SILENCE_PERIOD( intel_dp->alpm_parameters.silence_period_sym_clocks); - lfps_ctl_val = PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(10) | + lfps_ctl_val = PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(LFPS_CYCLE_COUNT) | PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION( intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION( diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index e007380e9a63..3b14f929825a 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -236,7 +236,8 @@ static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 struct intel_panel *panel = &connector->panel; u32 tmp, mask; - drm_WARN_ON(display->drm, panel->backlight.pwm_level_max == 0); + if (drm_WARN_ON(display->drm, panel->backlight.pwm_level_max == 0)) + return; if (panel->backlight.combination_mode) { struct pci_dev *pdev = to_pci_dev(display->drm->dev); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9c268bed091d..3596dce84c28 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -36,6 +36,7 @@ #include "soc/intel_rom.h" #include "i915_drv.h" +#include "i915_utils.h" #include "intel_display.h" #include "intel_display_core.h" #include "intel_display_rpm.h" @@ -1566,10 +1567,7 @@ parse_psr(struct intel_display *display, panel->vbt.psr.full_link = psr_table->full_link; panel->vbt.psr.require_aux_wakeup = psr_table->require_aux_to_wakeup; - - /* Allowed VBT values goes from 0 to 15 */ - panel->vbt.psr.idle_frames = psr_table->idle_frames < 0 ? 0 : - psr_table->idle_frames > 15 ? 15 : psr_table->idle_frames; + panel->vbt.psr.idle_frames = psr_table->idle_frames; /* * New psr options 0=500us, 1=100us, 2=2500us, 3=0us @@ -2480,6 +2478,25 @@ static int parse_bdb_216_dp_max_link_rate(const int vbt_max_link_rate) } } +static u32 edp_rate_override_mask(int rate) +{ + switch (rate) { + case 2000000: return BDB_263_VBT_EDP_LINK_RATE_20; + case 1350000: return BDB_263_VBT_EDP_LINK_RATE_13_5; + case 1000000: return BDB_263_VBT_EDP_LINK_RATE_10; + case 810000: return BDB_263_VBT_EDP_LINK_RATE_8_1; + case 675000: return BDB_263_VBT_EDP_LINK_RATE_6_75; + case 540000: return BDB_263_VBT_EDP_LINK_RATE_5_4; + case 432000: return BDB_263_VBT_EDP_LINK_RATE_4_32; + case 324000: return BDB_263_VBT_EDP_LINK_RATE_3_24; + case 270000: return BDB_263_VBT_EDP_LINK_RATE_2_7; + case 243000: return BDB_263_VBT_EDP_LINK_RATE_2_43; + case 216000: return BDB_263_VBT_EDP_LINK_RATE_2_16; + case 162000: return BDB_263_VBT_EDP_LINK_RATE_1_62; + default: return 0; + } +} + int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) { if (!devdata || devdata->display->vbt.version < 216) @@ -2499,6 +2516,19 @@ int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) return devdata->child.dp_max_lane_count + 1; } +bool +intel_bios_encoder_reject_edp_rate(const struct intel_bios_encoder_data *devdata, + int rate) +{ + if (!devdata || devdata->display->vbt.version < 263) + return false; + + if (devdata->child.edp_data_rate_override == BDB_263_VBT_EDP_RATES_MASK) + return false; + + return devdata->child.edp_data_rate_override & edp_rate_override_mask(rate); +} + static void sanitize_device_type(struct intel_bios_encoder_data *devdata, enum port port) { @@ -2747,8 +2777,10 @@ static int child_device_expected_size(u16 version) { BUILD_BUG_ON(sizeof(struct child_device_config) < 40); - if (version > 256) + if (version > 263) return -ENOENT; + else if (version >= 263) + return 44; else if (version >= 256) return 40; else if (version >= 216) @@ -3743,8 +3775,6 @@ DEFINE_SHOW_ATTRIBUTE(intel_bios_vbt); void intel_bios_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_vbt", 0444, minor->debugfs_root, + debugfs_create_file("i915_vbt", 0444, display->drm->debugfs_root, display, &intel_bios_vbt_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 6cd7a011b8c4..f9e438b2787b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -50,180 +50,6 @@ enum intel_backlight_type { INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE, }; -/* - * MIPI Sequence Block definitions - * - * Note the VBT spec has AssertReset / DeassertReset swapped from their - * usual naming, we use the proper names here to avoid confusion when - * reading the code. - */ -enum mipi_seq { - MIPI_SEQ_END = 0, - MIPI_SEQ_DEASSERT_RESET, /* Spec says MipiAssertResetPin */ - MIPI_SEQ_INIT_OTP, - MIPI_SEQ_DISPLAY_ON, - MIPI_SEQ_DISPLAY_OFF, - MIPI_SEQ_ASSERT_RESET, /* Spec says MipiDeassertResetPin */ - MIPI_SEQ_BACKLIGHT_ON, /* sequence block v2+ */ - MIPI_SEQ_BACKLIGHT_OFF, /* sequence block v2+ */ - MIPI_SEQ_TEAR_ON, /* sequence block v2+ */ - MIPI_SEQ_TEAR_OFF, /* sequence block v3+ */ - MIPI_SEQ_POWER_ON, /* sequence block v3+ */ - MIPI_SEQ_POWER_OFF, /* sequence block v3+ */ - MIPI_SEQ_MAX -}; - -enum mipi_seq_element { - MIPI_SEQ_ELEM_END = 0, - MIPI_SEQ_ELEM_SEND_PKT, - MIPI_SEQ_ELEM_DELAY, - MIPI_SEQ_ELEM_GPIO, - MIPI_SEQ_ELEM_I2C, /* sequence block v2+ */ - MIPI_SEQ_ELEM_SPI, /* sequence block v3+ */ - MIPI_SEQ_ELEM_PMIC, /* sequence block v3+ */ - MIPI_SEQ_ELEM_MAX -}; - -#define MIPI_DSI_UNDEFINED_PANEL_ID 0 -#define MIPI_DSI_GENERIC_PANEL_ID 1 - -struct mipi_config { - u16 panel_id; - - /* General Params */ - u32 enable_dithering:1; - u32 rsvd1:1; - u32 is_bridge:1; - - u32 panel_arch_type:2; - u32 is_cmd_mode:1; - -#define NON_BURST_SYNC_PULSE 0x1 -#define NON_BURST_SYNC_EVENTS 0x2 -#define BURST_MODE 0x3 - u32 video_transfer_mode:2; - - u32 cabc_supported:1; -#define PPS_BLC_PMIC 0 -#define PPS_BLC_SOC 1 - u32 pwm_blc:1; - - /* Bit 13:10 */ -#define PIXEL_FORMAT_RGB565 0x1 -#define PIXEL_FORMAT_RGB666 0x2 -#define PIXEL_FORMAT_RGB666_LOOSELY_PACKED 0x3 -#define PIXEL_FORMAT_RGB888 0x4 - u32 videomode_color_format:4; - - /* Bit 15:14 */ -#define ENABLE_ROTATION_0 0x0 -#define ENABLE_ROTATION_90 0x1 -#define ENABLE_ROTATION_180 0x2 -#define ENABLE_ROTATION_270 0x3 - u32 rotation:2; - u32 bta_enabled:1; - u32 rsvd2:15; - - /* 2 byte Port Description */ -#define DUAL_LINK_NOT_SUPPORTED 0 -#define DUAL_LINK_FRONT_BACK 1 -#define DUAL_LINK_PIXEL_ALT 2 - u16 dual_link:2; - u16 lane_cnt:2; - u16 pixel_overlap:3; - u16 rgb_flip:1; -#define DL_DCS_PORT_A 0x00 -#define DL_DCS_PORT_C 0x01 -#define DL_DCS_PORT_A_AND_C 0x02 - u16 dl_dcs_cabc_ports:2; - u16 dl_dcs_backlight_ports:2; - u16 rsvd3:4; - - u16 rsvd4; - - u8 rsvd5; - u32 target_burst_mode_freq; - u32 dsi_ddr_clk; - u32 bridge_ref_clk; - -#define BYTE_CLK_SEL_20MHZ 0 -#define BYTE_CLK_SEL_10MHZ 1 -#define BYTE_CLK_SEL_5MHZ 2 - u8 byte_clk_sel:2; - - u8 rsvd6:6; - - /* DPHY Flags */ - u16 dphy_param_valid:1; - u16 eot_pkt_disabled:1; - u16 enable_clk_stop:1; - u16 rsvd7:13; - - u32 hs_tx_timeout; - u32 lp_rx_timeout; - u32 turn_around_timeout; - u32 device_reset_timer; - u32 master_init_timer; - u32 dbi_bw_timer; - u32 lp_byte_clk_val; - - /* 4 byte Dphy Params */ - u32 prepare_cnt:6; - u32 rsvd8:2; - u32 clk_zero_cnt:8; - u32 trail_cnt:5; - u32 rsvd9:3; - u32 exit_zero_cnt:6; - u32 rsvd10:2; - - u32 clk_lane_switch_cnt; - u32 hl_switch_cnt; - - u32 rsvd11[6]; - - /* timings based on dphy spec */ - u8 tclk_miss; - u8 tclk_post; - u8 rsvd12; - u8 tclk_pre; - u8 tclk_prepare; - u8 tclk_settle; - u8 tclk_term_enable; - u8 tclk_trail; - u16 tclk_prepare_clkzero; - u8 rsvd13; - u8 td_term_enable; - u8 teot; - u8 ths_exit; - u8 ths_prepare; - u16 ths_prepare_hszero; - u8 rsvd14; - u8 ths_settle; - u8 ths_skip; - u8 ths_trail; - u8 tinit; - u8 tlpx; - u8 rsvd15[3]; - - /* GPIOs */ - u8 panel_enable; - u8 bl_enable; - u8 pwm_enable; - u8 reset_r_n; - u8 pwr_down_r; - u8 stdby_r_n; - -} __packed; - -/* all delays have a unit of 100us */ -struct mipi_pps_data { - u16 panel_on_delay; - u16 bl_enable_delay; - u16 bl_disable_delay; - u16 panel_off_delay; - u16 panel_power_cycle_delay; -} __packed; - void intel_bios_init(struct intel_display *display); void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, @@ -259,6 +85,8 @@ bool intel_bios_encoder_is_lspcon(const struct intel_bios_encoder_data *devdata) bool intel_bios_encoder_lane_reversal(const struct intel_bios_encoder_data *devdata); bool intel_bios_encoder_hpd_invert(const struct intel_bios_encoder_data *devdata); enum port intel_bios_encoder_port(const struct intel_bios_encoder_data *devdata); +bool intel_bios_encoder_reject_edp_rate(const struct intel_bios_encoder_data *devdata, + int rate); enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata); diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c index 65d64f79a4bd..d29c1508ccb9 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.c +++ b/drivers/gpu/drm/i915/display/intel_bo.c @@ -2,7 +2,7 @@ /* Copyright © 2024 Intel Corporation */ #include <drm/drm_panic.h> -#include "display/intel_display_types.h" + #include "gem/i915_gem_mman.h" #include "gem/i915_gem_object.h" #include "gem/i915_gem_object_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index d29a755612de..ac6da20d9529 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -359,7 +359,7 @@ static int icl_get_qgv_points(struct intel_display *display, for (i = 0; i < qi->num_psf_points; i++) drm_dbg_kms(display->drm, - "PSF GV %d: CLK=%d \n", + "PSF GV %d: CLK=%d\n", i, qi->psf_points[i].clk); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 228aa64c1349..9725eebe5706 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -22,6 +22,7 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> #include <linux/time.h> #include <drm/drm_fixed.h> @@ -31,6 +32,7 @@ #include "hsw_ips.h" #include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_bw.h" @@ -672,6 +674,7 @@ static void vlv_set_cdclk(struct intel_display *display, int cdclk = cdclk_config->cdclk; u32 val, cmd = cdclk_config->voltage_level; intel_wakeref_t wakeref; + int ret; switch (cdclk) { case 400000: @@ -702,12 +705,12 @@ static void vlv_set_cdclk(struct intel_display *display, val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - drm_err(display->drm, - "timed out waiting for CDclk change\n"); - } + + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (val & DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); if (cdclk == 400000) { u32 divider; @@ -721,11 +724,11 @@ static void vlv_set_cdclk(struct intel_display *display, val |= divider; vlv_cck_write(display->drm, CCK_DISPLAY_CLOCK_CONTROL, val); - if (wait_for((vlv_cck_read(display->drm, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - drm_err(display->drm, - "timed out waiting for CDclk change\n"); + ret = poll_timeout_us(val = vlv_cck_read(display->drm, CCK_DISPLAY_CLOCK_CONTROL), + (val & CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); } /* adjust self-refresh exit latency value */ @@ -761,6 +764,7 @@ static void chv_set_cdclk(struct intel_display *display, int cdclk = cdclk_config->cdclk; u32 val, cmd = cdclk_config->voltage_level; intel_wakeref_t wakeref; + int ret; switch (cdclk) { case 333333: @@ -786,12 +790,12 @@ static void chv_set_cdclk(struct intel_display *display, val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - drm_err(display->drm, - "timed out waiting for CDclk change\n"); - } + + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (val & DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); vlv_punit_put(display->drm); @@ -903,8 +907,10 @@ static void bdw_set_cdclk(struct intel_display *display, * According to the spec, it should be enough to poll for this 1 us. * However, extensive testing shows that this can take longer. */ - if (wait_for_us(intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 100)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, + 100, 0, NULL); + if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); intel_de_rmw(display, LCPLL_CTL, @@ -913,8 +919,10 @@ static void bdw_set_cdclk(struct intel_display *display, intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - if (wait_for_us((intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 0, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); intel_pcode_write(display->drm, HSW_PCODE_DE_WRITE_FREQ_REQ, @@ -3569,7 +3577,7 @@ static int i9xx_hrawclk(struct intel_display *display) struct drm_i915_private *i915 = to_i915(display->drm); /* hrawclock is 1/4 the FSB frequency */ - return DIV_ROUND_CLOSEST(i9xx_fsb_freq(i915), 4); + return DIV_ROUND_CLOSEST(intel_fsb_freq(i915), 4); } /** @@ -3622,9 +3630,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_cdclk_info); void intel_cdclk_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_cdclk_info", 0444, minor->debugfs_root, + debugfs_create_file("i915_cdclk_info", 0444, display->drm->debugfs_root, display, &i915_cdclk_info_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 42c923f416b3..6a55854db5b6 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -77,7 +77,7 @@ void intel_connector_cancel_modeset_retry_work(struct intel_connector *connector drm_connector_put(&connector->base); } -int intel_connector_init(struct intel_connector *connector) +static int intel_connector_init(struct intel_connector *connector) { struct intel_digital_connector_state *conn_state; diff --git a/drivers/gpu/drm/i915/display/intel_connector.h b/drivers/gpu/drm/i915/display/intel_connector.h index aafb25a814fa..0aa86626e646 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.h +++ b/drivers/gpu/drm/i915/display/intel_connector.h @@ -14,7 +14,6 @@ struct i2c_adapter; struct intel_connector; struct intel_encoder; -int intel_connector_init(struct intel_connector *connector); struct intel_connector *intel_connector_alloc(void); void intel_connector_free(struct intel_connector *connector); void intel_connector_destroy(struct drm_connector *connector); diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 898c5d9e8f7a..31e68047f217 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -50,6 +50,7 @@ #include "intel_gmbus.h" #include "intel_hotplug.h" #include "intel_hotplug_irq.h" +#include "intel_link_bw.h" #include "intel_load_detect.h" #include "intel_pch_display.h" #include "intel_pch_refclk.h" @@ -421,7 +422,7 @@ static int pch_crt_compute_config(struct intel_encoder *encoder, return -EINVAL; crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -446,7 +447,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, return -EINVAL; crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 198e69efe9ac..d4d181f9dca5 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -33,17 +33,9 @@ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, }; -static u32 intel_cursor_base(const struct intel_plane_state *plane_state) +static u32 intel_cursor_surf_offset(const struct intel_plane_state *plane_state) { - struct intel_display *display = to_intel_display(plane_state); - u32 base; - - if (DISPLAY_INFO(display)->cursor_needs_physical) - base = plane_state->phys_dma_addr; - else - base = intel_plane_ggtt_offset(plane_state); - - return base + plane_state->view.color_plane[0].offset; + return plane_state->view.color_plane[0].offset; } static u32 intel_cursor_position(const struct intel_crtc_state *crtc_state, @@ -213,8 +205,7 @@ static u32 i845_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) return cntl; } -static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i845_cursor_ctl(const struct intel_plane_state *plane_state) { return CURSOR_ENABLE | CURSOR_FORMAT_ARGB | @@ -274,7 +265,7 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state); + plane_state->ctl = i845_cursor_ctl(plane_state); return 0; } @@ -297,7 +288,7 @@ static void i845_cursor_update_arm(struct intel_dsb *dsb, size = CURSOR_HEIGHT(height) | CURSOR_WIDTH(width); - base = intel_cursor_base(plane_state); + base = plane_state->surf; pos = intel_cursor_position(crtc_state, plane_state, false); } @@ -406,8 +397,7 @@ static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) return cntl; } -static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_cursor_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); u32 cntl = 0; @@ -534,7 +524,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state); + plane_state->ctl = i9xx_cursor_ctl(plane_state); return 0; } @@ -675,7 +665,7 @@ static void i9xx_cursor_update_arm(struct intel_dsb *dsb, if (width != height) fbc_ctl = CUR_FBC_EN | CUR_FBC_HEIGHT(height - 1); - base = intel_cursor_base(plane_state); + base = plane_state->surf; pos = intel_cursor_position(crtc_state, plane_state, false); } @@ -1051,6 +1041,8 @@ intel_cursor_plane_create(struct intel_display *display, cursor->check_plane = i9xx_check_cursor; } + cursor->surf_offset = intel_cursor_surf_offset; + if (DISPLAY_VER(display) >= 5 || display->platform.g4x) cursor->capture_error = g4x_cursor_capture_error; else diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 0405396c7750..46017091bb0b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2166,7 +2166,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, { struct intel_display *display = to_intel_display(crtc_state); enum tc_port tc_port = intel_encoder_to_tc(&dig_port->base); - u32 ln0, ln1, pin_assignment; + enum intel_tc_pin_assignment pin_assignment; + u32 ln0, ln1; u8 width; if (DISPLAY_VER(display) >= 14) @@ -2188,11 +2189,11 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); /* DPPATC */ - pin_assignment = intel_tc_port_get_pin_assignment_mask(dig_port); + pin_assignment = intel_tc_port_get_pin_assignment(dig_port); width = crtc_state->lane_count; switch (pin_assignment) { - case 0x0: + case INTEL_TC_PIN_ASSIGNMENT_NONE: drm_WARN_ON(display->drm, !intel_tc_port_in_legacy_mode(dig_port)); if (width == 1) { @@ -2202,20 +2203,20 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x1: + case INTEL_TC_PIN_ASSIGNMENT_A: if (width == 4) { ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x2: + case INTEL_TC_PIN_ASSIGNMENT_B: if (width == 2) { ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x3: - case 0x5: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_E: if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; @@ -2224,8 +2225,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x4: - case 0x6: + case INTEL_TC_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_F: if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; @@ -2339,34 +2340,24 @@ static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, drm_dbg_kms(display->drm, "Failed to clear FEC detected flags\n"); } -static int read_fec_detected_status(struct drm_dp_aux *aux) -{ - int ret; - u8 status; - - ret = drm_dp_dpcd_readb(aux, DP_FEC_STATUS, &status); - if (ret < 0) - return ret; - - return status; -} - static int wait_for_fec_detected(struct drm_dp_aux *aux, bool enabled) { struct intel_display *display = to_intel_display(aux->drm_dev); int mask = enabled ? DP_FEC_DECODE_EN_DETECTED : DP_FEC_DECODE_DIS_DETECTED; - int status; - int err; + u8 status = 0; + int ret, err; - err = readx_poll_timeout(read_fec_detected_status, aux, status, - status & mask || status < 0, - 10000, 200000); + ret = poll_timeout_us(err = drm_dp_dpcd_read_byte(aux, DP_FEC_STATUS, &status), + err || (status & mask), + 10 * 1000, 200 * 1000, false); - if (err || status < 0) { + /* Either can be non-zero, but not both */ + ret = ret ?: err; + if (ret) { drm_dbg_kms(display->drm, - "Failed waiting for FEC %s to get detected: %d (status %d)\n", - str_enabled_disabled(enabled), err, status); - return err ? err : status; + "Failed waiting for FEC %s to get detected: %d (status 0x%02x)\n", + str_enabled_disabled(enabled), ret, status); + return ret; } return 0; @@ -2561,6 +2552,7 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) enum port port = encoder->port; i915_reg_t reg; u32 set_bits, wait_bits; + int ret; if (DISPLAY_VER(display) < 14) return; @@ -2576,7 +2568,11 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) } intel_de_rmw(display, reg, 0, set_bits); - if (wait_for_us(intel_de_read(display, reg) & wait_bits, 100)) { + + ret = intel_de_wait_custom(display, reg, + wait_bits, wait_bits, + 100, 0, NULL); + if (ret) { drm_err(display->drm, "Timeout waiting for D2D Link enable for DDI/PORT_BUF_CTL %c\n", port_name(port)); } @@ -3058,6 +3054,7 @@ mtl_ddi_disable_d2d(struct intel_encoder *encoder) enum port port = encoder->port; i915_reg_t reg; u32 clr_bits, wait_bits; + int ret; if (DISPLAY_VER(display) < 14) return; @@ -3073,7 +3070,11 @@ mtl_ddi_disable_d2d(struct intel_encoder *encoder) } intel_de_rmw(display, reg, clr_bits, 0); - if (wait_for_us(!(intel_de_read(display, reg) & wait_bits), 100)) + + ret = intel_de_wait_custom(display, reg, + wait_bits, 0, + 100, 0, NULL); + if (ret) drm_err(display->drm, "Timeout waiting for D2D Link disable for DDI/PORT_BUF_CTL %c\n", port_name(port)); } @@ -5148,12 +5149,10 @@ void intel_ddi_init(struct intel_display *display, phy_name(phy)); } - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return; - dig_port->aux_ch = AUX_CH_NONE; - encoder = &dig_port->base; encoder->devdata = devdata; @@ -5191,9 +5190,6 @@ void intel_ddi_init(struct intel_display *display, intel_encoder_link_check_init(encoder, intel_ddi_link_check); - mutex_init(&dig_port->hdcp.mutex); - dig_port->hdcp.num_streams = 0; - encoder->hotplug = intel_ddi_hotplug; encoder->compute_output_type = intel_ddi_compute_output_type; encoder->compute_config = intel_ddi_compute_config; @@ -5331,7 +5327,6 @@ void intel_ddi_init(struct intel_display *display, dig_port->ddi_a_4_lanes = DISPLAY_VER(display) < 11 && ddi_buf_ctl & DDI_A_4_LANES; - dig_port->dp.output_reg = INVALID_MMIO_REG; dig_port->max_lanes = intel_ddi_max_lanes(dig_port); if (need_aux_ch(encoder, init_dp)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7035c1fc9033..c1a3a95c65f0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -76,6 +76,7 @@ #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_dmc.h" #include "intel_dp.h" #include "intel_dp_link_training.h" @@ -1081,6 +1082,11 @@ static void intel_post_plane_update(struct intel_atomic_state *state, if (audio_enabling(old_crtc_state, new_crtc_state)) intel_encoders_audio_enable(state, crtc); + if (intel_display_wa(display, 14011503117)) { + if (old_crtc_state->pch_pfit.enabled != new_crtc_state->pch_pfit.enabled) + adl_scaler_ecc_unmask(new_crtc_state); + } + intel_alpm_post_plane_update(state, crtc); intel_psr_post_plane_update(state, crtc); diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.c b/drivers/gpu/drm/i915/display/intel_display_conversion.c index 4d565935e2cc..d56065f22655 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.c +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.c @@ -4,7 +4,7 @@ #include "i915_drv.h" #include "intel_display_conversion.h" -struct intel_display *__i915_to_display(struct drm_i915_private *i915) +static struct intel_display *__i915_to_display(struct drm_i915_private *i915) { return i915->display; } diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.h b/drivers/gpu/drm/i915/display/intel_display_conversion.h index 46c7208d42ba..d497bc58a73f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.h +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.h @@ -9,20 +9,8 @@ #define __INTEL_DISPLAY_CONVERSION__ struct drm_device; -struct drm_i915_private; struct intel_display; -struct intel_display *__i915_to_display(struct drm_i915_private *i915); struct intel_display *__drm_to_display(struct drm_device *drm); -/* - * Transitional macro to optionally convert struct drm_i915_private * to struct - * intel_display *, also accepting the latter. - */ -#define __to_intel_display(p) \ - _Generic(p, \ - const struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ - struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ - const struct intel_display *: (p), \ - struct intel_display *: (p)) #endif /* __INTEL_DISPLAY_CONVERSION__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index ce3f9810c42d..10dddec3796f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -820,14 +820,14 @@ static const struct drm_info_list intel_display_debugfs_list[] = { void intel_display_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_fifo_underrun_reset", 0644, minor->debugfs_root, + debugfs_create_file("i915_fifo_underrun_reset", 0644, debugfs_root, display, &i915_fifo_underrun_reset_ops); drm_debugfs_create_files(intel_display_debugfs_list, ARRAY_SIZE(intel_display_debugfs_list), - minor->debugfs_root, minor); + debugfs_root, display->drm->primary); intel_bios_debugfs_register(display); intel_cdclk_debugfs_register(display); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c index 88914a1f3f62..de62b774272d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -7,7 +7,6 @@ #include <linux/kernel.h> #include <drm/drm_drv.h> -#include <drm/drm_file.h> #include "intel_display_core.h" #include "intel_display_debugfs_params.h" @@ -154,14 +153,14 @@ intel_display_debugfs_create_uint(const char *name, umode_t mode, /* add a subdirectory with files for each intel display param */ void intel_display_debugfs_params(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; struct dentry *dir; char dirname[16]; snprintf(dirname, sizeof(dirname), "%s_params", display->drm->driver->name); - dir = debugfs_lookup(dirname, minor->debugfs_root); + dir = debugfs_lookup(dirname, debugfs_root); if (!dir) - dir = debugfs_create_dir(dirname, minor->debugfs_root); + dir = debugfs_create_dir(dirname, debugfs_root); if (IS_ERR(dir)) return; diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 089cffabbad5..65f0efc35bb7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1354,6 +1354,19 @@ static const struct intel_display_device_info xe2_lpd_display = { .__runtime_defaults.has_dbuf_overlap_detection = true, }; +static const struct intel_display_device_info wcl_display = { + XE_LPDP_FEATURES, + + .__runtime_defaults.cpu_transcoder_mask = + BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), + .__runtime_defaults.pipe_mask = + BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .__runtime_defaults.fbc_mask = + BIT(INTEL_FBC_A) | BIT(INTEL_FBC_B) | BIT(INTEL_FBC_C), + .__runtime_defaults.port_mask = + BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), +}; + static const struct intel_display_device_info xe2_hpd_display = { XE_LPDP_FEATURES, .__runtime_defaults.port_mask = BIT(PORT_A) | @@ -1480,7 +1493,7 @@ static const struct { { 14, 1, &xe2_hpd_display }, { 20, 0, &xe2_lpd_display }, { 30, 0, &xe2_lpd_display }, - { 30, 2, &xe2_lpd_display }, + { 30, 2, &wcl_display }, }; static const struct intel_display_device_info * diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 4308822f0415..6e87b763fe7c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -9,7 +9,6 @@ #include <linux/bitops.h> #include <linux/types.h> -#include "intel_display_conversion.h" #include "intel_display_limits.h" struct drm_printer; @@ -224,8 +223,8 @@ struct intel_display_platforms { (IS_DISPLAY_VERx100((__display), (ipver), (ipver)) && \ IS_DISPLAY_STEP((__display), (from), (until))) -#define DISPLAY_INFO(__display) (__to_intel_display(__display)->info.__device_info) -#define DISPLAY_RUNTIME_INFO(__display) (&__to_intel_display(__display)->info.__runtime_info) +#define DISPLAY_INFO(__display) ((__display)->info.__device_info) +#define DISPLAY_RUNTIME_INFO(__display) (&(__display)->info.__runtime_info) #define DISPLAY_VER(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver) #define DISPLAY_VERx100(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver * 100 + \ @@ -236,7 +235,7 @@ struct intel_display_platforms { #define INTEL_DISPLAY_STEP(__display) (DISPLAY_RUNTIME_INFO(__display)->step) #define IS_DISPLAY_STEP(__display, since, until) \ - (drm_WARN_ON(__to_intel_display(__display)->drm, INTEL_DISPLAY_STEP(__display) == STEP_NONE), \ + (drm_WARN_ON((__display)->drm, INTEL_DISPLAY_STEP(__display) == STEP_NONE), \ INTEL_DISPLAY_STEP(__display) >= (since) && INTEL_DISPLAY_STEP(__display) < (until)) #define ARLS_HOST_BRIDGE_PCI_ID1 0x7D1C diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 8586ba102605..cf1c14412abe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -18,6 +18,7 @@ #include <drm/drm_vblank.h> #include "i915_drv.h" +#include "i915_utils.h" #include "i9xx_wm.h" #include "intel_acpi.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index fb25ec8adae3..123e054affbe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1506,10 +1506,14 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl) if (!(master_ctl & GEN11_GU_MISC_IRQ)) return 0; + intel_display_rpm_assert_block(display); + iir = intel_de_read(display, GEN11_GU_MISC_IIR); if (likely(iir)) intel_de_write(display, GEN11_GU_MISC_IIR, iir); + intel_display_rpm_assert_unblock(display); + return iir; } @@ -1986,20 +1990,17 @@ void vlv_display_irq_postinstall(struct intel_display *display) void ibx_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (HAS_PCH_NOP(i915)) + if (HAS_PCH_NOP(display)) return; gen2_irq_reset(to_intel_uncore(display->drm), SDE_IRQ_REGS); - if (HAS_PCH_CPT(i915) || HAS_PCH_LPT(i915)) + if (HAS_PCH_CPT(display) || HAS_PCH_LPT(display)) intel_de_write(display, SERR_INT, 0xffffffff); } void gen8_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe; if (!HAS_DISPLAY(display)) @@ -2016,7 +2017,7 @@ void gen8_display_irq_reset(struct intel_display *display) intel_display_irq_regs_reset(display, GEN8_DE_PORT_IRQ_REGS); intel_display_irq_regs_reset(display, GEN8_DE_MISC_IRQ_REGS); - if (HAS_PCH_SPLIT(i915)) + if (HAS_PCH_SPLIT(display)) ibx_display_irq_reset(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c index 75316247ee8a..2aed110c5b09 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -120,6 +120,9 @@ intel_display_param_named_unsafe(enable_psr, int, 0400, "(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) " "Default: -1 (use per-chip default)"); +intel_display_param_named_unsafe(enable_panel_replay, int, 0400, + "Enable Panel Replay (0=disabled, 1=enabled). Default: -1 (use per-chip default)"); + intel_display_param_named(psr_safest_params, bool, 0400, "Replace PSR VBT parameters by the safest and not optimal ones. This " "is helpful to detect if PSR issues are related to bad values set in " diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h index 784e6bae8615..b01bc5700c52 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -46,6 +46,7 @@ struct drm_printer; param(bool, enable_dp_mst, true, 0600) \ param(int, enable_fbc, -1, 0600) \ param(int, enable_psr, -1, 0600) \ + param(int, enable_panel_replay, -1, 0600) \ param(bool, psr_safest_params, false, 0400) \ param(bool, enable_psr2_sel_fetch, true, 0400) \ param(int, enable_dmc_wl, -1, 0400) \ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 273054c22325..da4babfd6bcb 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3,6 +3,7 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/iopoll.h> #include <linux/string_helpers.h> #include "soc/intel_dram.h" @@ -10,6 +11,7 @@ #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_backlight_regs.h" #include "intel_cdclk.h" #include "intel_clock_gating.h" @@ -1172,7 +1174,7 @@ static void icl_mbus_init(struct intel_display *display) if (DISPLAY_VER(display) == 12) abox_regs |= BIT(0); - for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) + for_each_set_bit(i, &abox_regs, BITS_PER_TYPE(abox_regs)) intel_de_rmw(display, MBUS_ABOX_CTL(i), mask, val); } @@ -1278,6 +1280,7 @@ static void hsw_disable_lcpll(struct intel_display *display, bool switch_to_fclk, bool allow_power_down) { u32 val; + int ret; assert_can_disable_lcpll(display); @@ -1287,8 +1290,10 @@ static void hsw_disable_lcpll(struct intel_display *display, val |= LCPLL_CD_SOURCE_FCLK; intel_de_write(display, LCPLL_CTL, val); - if (wait_for_us(intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); val = intel_de_read(display, LCPLL_CTL); @@ -1306,8 +1311,10 @@ static void hsw_disable_lcpll(struct intel_display *display, hsw_write_dcomp(display, val); ndelay(100); - if (wait_for((hsw_read_dcomp(display) & - D_COMP_RCOMP_IN_PROGRESS) == 0, 1)) + ret = poll_timeout_us(val = hsw_read_dcomp(display), + (val & D_COMP_RCOMP_IN_PROGRESS) == 0, + 100, 1000, false); + if (ret) drm_err(display->drm, "D_COMP RCOMP still in progress\n"); if (allow_power_down) { @@ -1324,6 +1331,7 @@ static void hsw_restore_lcpll(struct intel_display *display) { struct drm_i915_private __maybe_unused *dev_priv = to_i915(display->drm); u32 val; + int ret; val = intel_de_read(display, LCPLL_CTL); @@ -1358,8 +1366,10 @@ static void hsw_restore_lcpll(struct intel_display *display) if (val & LCPLL_CD_SOURCE_FCLK) { intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - if (wait_for_us((intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 0, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); } @@ -1629,11 +1639,11 @@ static void tgl_bw_buddy_init(struct intel_display *display) if (table[config].page_mask == 0) { drm_dbg_kms(display->drm, "Unknown memory configuration; disabling address buddy logic.\n"); - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) intel_de_write(display, BW_BUDDY_CTL(i), BW_BUDDY_DISABLE); } else { - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) { + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) { intel_de_write(display, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); @@ -2155,8 +2165,6 @@ void intel_power_domains_resume(struct intel_display *display) power_domains->init_wakeref = intel_display_power_get(display, POWER_DOMAIN_INIT); } - - intel_power_domains_verify_state(display); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index 77268802b55e..39b71fffa2cd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -1717,6 +1717,59 @@ static const struct i915_power_well_desc_list xe3lpd_power_wells[] = { I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), }; +static const struct i915_power_well_desc wcl_power_wells_main[] = { + { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_2", &xe3lpd_pwdoms_pw_2, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .id = SKL_DISP_PW_2), + ), + .ops = &hsw_power_well_ops, + .has_vga = true, + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_A", &xelpd_pwdoms_pw_a, + .hsw.idx = XELPD_PW_CTL_IDX_PW_A), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_A), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_B", &xe3lpd_pwdoms_pw_b, + .hsw.idx = XELPD_PW_CTL_IDX_PW_B), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_B), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_C", &xe3lpd_pwdoms_pw_c, + .hsw.idx = XELPD_PW_CTL_IDX_PW_C), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_C), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("AUX_A", &icl_pwdoms_aux_a, .xelpdp.aux_ch = AUX_CH_A), + I915_PW("AUX_B", &icl_pwdoms_aux_b, .xelpdp.aux_ch = AUX_CH_B), + I915_PW("AUX_TC1", &xelpdp_pwdoms_aux_tc1, .xelpdp.aux_ch = AUX_CH_USBC1), + I915_PW("AUX_TC2", &xelpdp_pwdoms_aux_tc2, .xelpdp.aux_ch = AUX_CH_USBC2), + ), + .ops = &xelpdp_aux_power_well_ops, + }, +}; + +static const struct i915_power_well_desc_list wcl_power_wells[] = { + I915_PW_DESCRIPTORS(i9xx_power_wells_always_on), + I915_PW_DESCRIPTORS(icl_power_wells_pw_1), + I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), + I915_PW_DESCRIPTORS(wcl_power_wells_main), + I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), +}; + static void init_power_well_domains(const struct i915_power_well_instance *inst, struct i915_power_well *power_well) { @@ -1824,7 +1877,9 @@ int intel_display_power_map_init(struct i915_power_domains *power_domains) return 0; } - if (DISPLAY_VER(display) >= 30) + if (DISPLAY_VERx100(display) == 3002) + return set_power_wells(power_domains, wcl_power_wells); + else if (DISPLAY_VER(display) >= 30) return set_power_wells(power_domains, xe3lpd_power_wells); else if (DISPLAY_VER(display) >= 20) return set_power_wells(power_domains, xe2lpd_power_wells); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 48cac225a809..5e88b930f5aa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include <linux/iopoll.h> + #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -499,7 +501,6 @@ static void icl_tc_port_assert_ref_held(struct intel_display *display, static void icl_tc_cold_exit(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret, tries = 0; while (1) { @@ -514,7 +515,7 @@ static void icl_tc_cold_exit(struct intel_display *display) msleep(1); /* TODO: turn failure into a error as soon i915 CI updates ICL IFWI */ - drm_dbg_kms(&i915->drm, "TC cold block %s\n", ret ? "failed" : + drm_dbg_kms(display->drm, "TC cold block %s\n", ret ? "failed" : "succeeded"); } @@ -527,6 +528,8 @@ icl_tc_phy_aux_power_well_enable(struct intel_display *display, const struct i915_power_well_regs *regs = power_well->desc->ops->regs; bool is_tbt = power_well->desc->is_tc_tbt; bool timeout_expected; + u32 val; + int ret; icl_tc_port_assert_ref_held(display, power_well, dig_port); @@ -553,10 +556,11 @@ icl_tc_phy_aux_power_well_enable(struct intel_display *display, tc_port = TGL_AUX_PW_TO_TC_PORT(i915_power_well_instance(power_well)->hsw.idx); - if (wait_for(intel_dkl_phy_read(display, DKL_CMN_UC_DW_27(tc_port)) & - DKL_CMN_UC_DW27_UC_HEALTH, 1)) - drm_warn(display->drm, - "Timeout waiting TC uC health\n"); + ret = poll_timeout_us(val = intel_dkl_phy_read(display, DKL_CMN_UC_DW_27(tc_port)), + val & DKL_CMN_UC_DW27_UC_HEALTH, + 100, 1000, false); + if (ret) + drm_warn(display->drm, "Timeout waiting TC uC health\n"); } } @@ -1122,6 +1126,8 @@ static void vlv_set_power_well(struct intel_display *display, u32 mask; u32 state; u32 ctrl; + u32 val; + int ret; mask = PUNIT_PWRGT_MASK(pw_idx); state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) : @@ -1129,10 +1135,8 @@ static void vlv_set_power_well(struct intel_display *display, vlv_punit_get(display->drm); -#define COND \ - ((vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS) & mask) == state) - - if (COND) + val = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS); + if ((val & mask) == state) goto out; ctrl = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_CTRL); @@ -1140,14 +1144,15 @@ static void vlv_set_power_well(struct intel_display *display, ctrl |= state; vlv_punit_write(display->drm, PUNIT_REG_PWRGT_CTRL, ctrl); - if (wait_for(COND, 100)) + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS), + (val & mask) == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(display->drm, PUNIT_REG_PWRGT_CTRL)); -#undef COND - out: vlv_punit_put(display->drm); } @@ -1208,7 +1213,7 @@ static void vlv_init_display_clock_gating(struct intel_display *display) * (and never recovering) in this case. intel_dsi_post_disable() will * clear it when we turn off the display. */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, ~DPOUNIT_CLOCK_GATE_DISABLE, VRHUNIT_CLOCK_GATE_DISABLE); /* @@ -1711,23 +1716,24 @@ static void chv_set_pipe_power_well(struct intel_display *display, enum pipe pipe = PIPE_A; u32 state; u32 ctrl; + int ret; state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); vlv_punit_get(display->drm); -#define COND \ - ((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state) - - if (COND) + ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); + if ((ctrl & DP_SSS_MASK(pipe)) == state) goto out; - ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); ctrl &= ~DP_SSC_MASK(pipe); ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, ctrl); - if (wait_for(COND, 100)) + ret = poll_timeout_us(ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (ctrl & DP_SSS_MASK(pipe)) == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, @@ -1765,7 +1771,6 @@ static void chv_pipe_power_well_disable(struct intel_display *display, static void tgl_tc_cold_request(struct intel_display *display, bool block) { - struct drm_i915_private *i915 = to_i915(display->drm); u8 tries = 0; int ret; @@ -1798,10 +1803,9 @@ tgl_tc_cold_request(struct intel_display *display, bool block) } if (ret) - drm_err(&i915->drm, "TC cold %sblock failed\n", - block ? "" : "un"); + drm_err(display->drm, "TC cold %sblock failed\n", block ? "" : "un"); else - drm_dbg_kms(&i915->drm, "TC cold %sblock succeeded\n", + drm_dbg_kms(display->drm, "TC cold %sblock succeeded\n", block ? "" : "un"); } diff --git a/drivers/gpu/drm/i915/display/intel_display_regs.h b/drivers/gpu/drm/i915/display/intel_display_regs.h index 7bd09d981cd2..9d71e26a4fa2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_regs.h +++ b/drivers/gpu/drm/i915/display/intel_display_regs.h @@ -2890,6 +2890,7 @@ enum skl_power_gate { #define DP_PIN_ASSIGNMENT_SHIFT(idx) ((idx) * 4) #define DP_PIN_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 4)) #define DP_PIN_ASSIGNMENT(idx, x) ((x) << ((idx) * 4)) +/* See enum intel_tc_pin_assignment for the pin assignment field values. */ #define _TCSS_DDI_STATUS_1 0x161500 #define _TCSS_DDI_STATUS_2 0x161504 @@ -2897,6 +2898,7 @@ enum skl_power_gate { _TCSS_DDI_STATUS_1, \ _TCSS_DDI_STATUS_2)) #define TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK REG_GENMASK(28, 25) +/* See enum intel_tc_pin_assignment for the pin assignment field values. */ #define TCSS_DDI_STATUS_READY REG_BIT(2) #define TCSS_DDI_STATUS_HPD_LIVE_STATUS_TBT REG_BIT(1) #define TCSS_DDI_STATUS_HPD_LIVE_STATUS_ALT REG_BIT(0) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ce45261c4a8f..fd9d2527889b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -50,14 +50,15 @@ #include "intel_display_limits.h" #include "intel_display_power.h" #include "intel_dpll_mgr.h" +#include "intel_dsi_vbt_defs.h" #include "intel_wm_types.h" struct cec_notifier; struct drm_printer; -struct __intel_global_objs_state; struct intel_connector; struct intel_ddi_buf_trans; struct intel_fbc; +struct intel_global_objs_state; struct intel_hdcp_shim; struct intel_tc_port; @@ -593,7 +594,7 @@ struct intel_atomic_state { struct ref_tracker *wakeref; - struct __intel_global_objs_state *global_objs; + struct intel_global_objs_state *global_objs; int num_global_objs; /* Internal commit, as opposed to userspace/client initiated one */ @@ -642,7 +643,6 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; - u32 phys_dma_addr; /* for cursor_needs_physical */ /* for legacy cursor fb unpin */ struct drm_vblank_work unpin_work; @@ -665,6 +665,9 @@ struct intel_plane_state { /* chroma upsampler control register */ u32 cus_ctl; + /* surface address register */ + u32 surf; + /* * scaler_id * = -1 : not using a scaler @@ -941,10 +944,6 @@ struct intel_csc_matrix { u16 postoff[3]; }; -void intel_io_mmio_fw_write(void *ctx, i915_reg_t reg, u32 val); - -typedef void (*intel_io_reg_write)(void *ctx, i915_reg_t reg, u32 val); - struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1122,6 +1121,7 @@ struct intel_crtc_state { bool req_psr2_sdp_prior_scanline; bool has_panel_replay; bool wm_level_disabled; + bool pkg_c_latency_used; u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; @@ -1534,6 +1534,7 @@ struct intel_plane { bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); + u32 (*surf_offset)(const struct intel_plane_state *plane_state); int (*min_cdclk)(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); void (*async_flip)(struct intel_dsb *dsb, @@ -1683,6 +1684,7 @@ struct intel_psr { u8 entry_setup_frames; bool link_ok; + bool pkg_c_latency_used; u8 active_non_psr_pipes; }; diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.c b/drivers/gpu/drm/i915/display/intel_display_wa.c index f57280e9d041..31cd2c9cd488 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.c +++ b/drivers/gpu/drm/i915/display/intel_display_wa.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <drm/drm_print.h> + #include "i915_reg.h" #include "intel_de.h" #include "intel_display_core.h" @@ -39,3 +41,36 @@ void intel_display_wa_apply(struct intel_display *display) else if (DISPLAY_VER(display) == 11) gen11_display_wa_apply(display); } + +/* + * Wa_16025573575: + * Fixes: Issue with bitbashing on Xe3 based platforms. + * Workaround: Set masks bits in GPIO CTL and preserve it during bitbashing sequence. + */ +static bool intel_display_needs_wa_16025573575(struct intel_display *display) +{ + return DISPLAY_VERx100(display) == 3000 || DISPLAY_VERx100(display) == 3002; +} + +/* + * Wa_14011503117: + * Fixes: Before enabling the scaler DE fatal error is masked + * Workaround: Unmask the DE fatal error register after enabling the scaler + * and after waiting of at least 1 frame. + */ +bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, const char *name) +{ + switch (wa) { + case INTEL_DISPLAY_WA_16023588340: + return intel_display_needs_wa_16023588340(display); + case INTEL_DISPLAY_WA_16025573575: + return intel_display_needs_wa_16025573575(display); + case INTEL_DISPLAY_WA_14011503117: + return DISPLAY_VER(display) == 13; + default: + drm_WARN(display->drm, 1, "Missing Wa number: %s\n", name); + break; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.h b/drivers/gpu/drm/i915/display/intel_display_wa.h index babd9d16603d..abc1df83f066 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.h +++ b/drivers/gpu/drm/i915/display/intel_display_wa.h @@ -21,4 +21,15 @@ static inline bool intel_display_needs_wa_16023588340(struct intel_display *disp bool intel_display_needs_wa_16023588340(struct intel_display *display); #endif +enum intel_display_wa { + INTEL_DISPLAY_WA_16023588340, + INTEL_DISPLAY_WA_16025573575, + INTEL_DISPLAY_WA_14011503117, +}; + +bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, const char *name); + +#define intel_display_wa(__display, __wa) \ + __intel_display_wa((__display), INTEL_DISPLAY_WA_##__wa, __stringify(__wa)) + #endif diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 744f51c0eab8..77a0199f9ea5 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -1603,9 +1603,7 @@ DEFINE_SHOW_ATTRIBUTE(intel_dmc_debugfs_status); void intel_dmc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_dmc_info", 0444, minor->debugfs_root, + debugfs_create_file("i915_dmc_info", 0444, display->drm->debugfs_root, display, &intel_dmc_debugfs_status_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 7976fec88606..2eab591a8ef5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/notifier.h> @@ -174,7 +175,6 @@ int intel_dp_link_symbol_clock(int rate) static int max_dprx_rate(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int max_rate; if (intel_dp_tunnel_bw_alloc_is_enabled(intel_dp)) @@ -183,16 +183,13 @@ static int max_dprx_rate(struct intel_dp *intel_dp) max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); /* - * Some broken eDP sinks illegally declare support for - * HBR3 without TPS4, and are unable to produce a stable - * output. Reject HBR3 when TPS4 is not available. + * Some platforms + eDP panels may not reliably support HBR3 + * due to signal integrity limitations, despite advertising it. + * Cap the link rate to HBR2 to avoid unstable configurations for the + * known machines. */ - if (max_rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { - drm_dbg_kms(display->drm, - "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", - encoder->base.base.id, encoder->base.name); - max_rate = 540000; - } + if (intel_dp_is_edp(intel_dp) && intel_has_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2)) + max_rate = min(max_rate, 540000); return max_rate; } @@ -1418,6 +1415,7 @@ intel_dp_mode_valid(struct drm_connector *_connector, struct intel_display *display = to_intel_display(_connector->dev); struct intel_connector *connector = to_intel_connector(_connector); struct intel_dp *intel_dp = intel_attached_dp(connector); + enum intel_output_format sink_format, output_format; const struct drm_display_mode *fixed_mode; int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; @@ -1451,6 +1449,13 @@ intel_dp_mode_valid(struct drm_connector *_connector, mode->hdisplay, target_clock); max_dotclk *= num_joined_pipes; + sink_format = intel_dp_sink_format(connector, mode); + output_format = intel_dp_output_format(connector, sink_format); + + status = intel_pfit_mode_valid(display, mode, output_format, num_joined_pipes); + if (status != MODE_OK) + return status; + if (target_clock > max_dotclk) return MODE_CLOCK_HIGH; @@ -1466,11 +1471,8 @@ intel_dp_mode_valid(struct drm_connector *_connector, intel_dp_mode_min_output_bpp(connector, mode)); if (intel_dp_has_dsc(connector)) { - enum intel_output_format sink_format, output_format; int pipe_bpp; - sink_format = intel_dp_sink_format(connector, mode); - output_format = intel_dp_output_format(connector, sink_format); /* * TBD pass the connector BPC, * for now U8_MAX so that max BPC on that platform would be picked @@ -2535,13 +2537,15 @@ intel_dp_dsc_compute_pipe_bpp_limits(struct intel_dp *intel_dp, bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, struct link_config_limits *limits) { bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + struct intel_connector *connector = + to_intel_connector(conn_state->connector); limits->min_rate = intel_dp_min_link_rate(intel_dp); limits->max_rate = intel_dp_max_link_rate(intel_dp); @@ -2551,7 +2555,8 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, limits->min_lane_count = intel_dp_min_lane_count(intel_dp); limits->max_lane_count = intel_dp_max_lane_count(intel_dp); - limits->pipe.min_bpp = intel_dp_min_bpp(crtc_state->output_format); + limits->pipe.min_bpp = intel_dp_in_hdr_mode(conn_state) ? 30 : + intel_dp_min_bpp(crtc_state->output_format); if (is_mst) { /* * FIXME: If all the streams can't fit into the link with their @@ -2650,7 +2655,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !intel_dp_compute_config_limits(intel_dp, connector, pipe_config, + !intel_dp_compute_config_limits(intel_dp, conn_state, pipe_config, respect_downstream_limits, false, &limits); @@ -2684,7 +2689,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); - if (!intel_dp_compute_config_limits(intel_dp, connector, pipe_config, + if (!intel_dp_compute_config_limits(intel_dp, conn_state, pipe_config, respect_downstream_limits, true, &limits)) @@ -2916,6 +2921,19 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, } } +bool +intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state) +{ + struct hdr_output_metadata *hdr_metadata; + + if (!conn_state->hdr_output_metadata) + return false; + + hdr_metadata = conn_state->hdr_output_metadata->data; + + return hdr_metadata->hdmi_metadata_type1.eotf == HDMI_EOTF_SMPTE_ST2084; +} + static void intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, @@ -3181,7 +3199,26 @@ int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state, */ min_hblank = min_hblank - 2; - min_hblank = min(10, min_hblank); + /* + * min_hblank formula is undergoing a change, to avoid underrun use the + * recomended value in spec to compare with the calculated one and use the + * minimum value + */ + if (intel_dp_is_uhbr(crtc_state)) { + /* + * Note: Bspec requires a min_hblank of 2 for YCBCR420 + * with compressed bpp 6, but the minimum compressed bpp + * supported by the driver is 8. + */ + drm_WARN_ON(display->drm, + (crtc_state->dsc.compression_enable && + crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && + crtc_state->dsc.compressed_bpp_x16 < fxp_q4_from_int(8))); + min_hblank = min(3, min_hblank); + } else { + min_hblank = min(10, min_hblank); + } + crtc_state->min_hblank = min_hblank; return 0; @@ -3842,10 +3879,11 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) if (ret < 0) return ret; /* Wait for PCON to be FRL Ready */ - wait_for(is_active = drm_dp_pcon_is_frl_ready(&intel_dp->aux) == true, TIMEOUT_FRL_READY_MS); - - if (!is_active) - return -ETIMEDOUT; + ret = poll_timeout_us(is_active = drm_dp_pcon_is_frl_ready(&intel_dp->aux), + is_active, + 1000, TIMEOUT_FRL_READY_MS * 1000, false); + if (ret) + return ret; ret = drm_dp_pcon_frl_configure_1(&intel_dp->aux, max_frl_bw, DP_PCON_ENABLE_SEQUENTIAL_LINK); @@ -3862,12 +3900,11 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) * Wait for FRL to be completed * Check if the HDMI Link is up and active. */ - wait_for(is_active = - intel_dp_pcon_is_frl_trained(intel_dp, max_frl_bw_mask, &frl_trained_mask), - TIMEOUT_HDMI_LINK_ACTIVE_MS); - - if (!is_active) - return -ETIMEDOUT; + ret = poll_timeout_us(is_active = intel_dp_pcon_is_frl_trained(intel_dp, max_frl_bw_mask, &frl_trained_mask), + is_active, + 1000, TIMEOUT_HDMI_LINK_ACTIVE_MS * 1000, false); + if (ret) + return ret; frl_trained: drm_dbg(display->drm, "FRL_TRAINED_MASK = %u\n", frl_trained_mask); @@ -4277,10 +4314,26 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) } static void +intel_edp_set_data_override_rates(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int *sink_rates = intel_dp->sink_rates; + int i, count = 0; + + for (i = 0; i < intel_dp->num_sink_rates; i++) { + if (intel_bios_encoder_reject_edp_rate(encoder->devdata, + intel_dp->sink_rates[i])) + continue; + + sink_rates[count++] = intel_dp->sink_rates[i]; + } + intel_dp->num_sink_rates = count; +} + +static void intel_edp_set_sink_rates(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; intel_dp->num_sink_rates = 0; @@ -4306,16 +4359,13 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) break; /* - * Some broken eDP sinks illegally declare support for - * HBR3 without TPS4, and are unable to produce a stable - * output. Reject HBR3 when TPS4 is not available. + * Some platforms cannot reliably drive HBR3 rates due to PHY limitations, + * even if the sink advertises support. Reject any sink rates above HBR2 on + * the known machines for stable output. */ - if (rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { - drm_dbg_kms(display->drm, - "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", - encoder->base.base.id, encoder->base.name); + if (rate > 540000 && + intel_has_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2)) break; - } intel_dp->sink_rates[i] = rate; } @@ -4330,6 +4380,8 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) intel_dp->use_rate_select = true; else intel_dp_set_sink_rates(intel_dp); + + intel_edp_set_data_override_rates(intel_dp); } static bool @@ -5611,14 +5663,9 @@ bool intel_digital_port_connected_locked(struct intel_encoder *encoder) intel_wakeref_t wakeref; with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) { - unsigned long wait_expires = jiffies + msecs_to_jiffies_timeout(4); - - do { - is_connected = dig_port->connected(encoder); - if (is_connected || is_glitch_free) - break; - usleep_range(10, 30); - } while (time_before(jiffies, wait_expires)); + poll_timeout_us(is_connected = dig_port->connected(encoder), + is_connected || is_glitch_free, + 30, 4000, false); } return is_connected; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 0657f5681196..f90cfd1dbbd0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -193,7 +193,7 @@ void intel_dp_wait_source_oui(struct intel_dp *intel_dp); int intel_dp_output_bpp(enum intel_output_format output_format, int bpp); bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, @@ -214,5 +214,6 @@ int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state, int intel_dp_dsc_bpp_step_x16(const struct intel_connector *connector); void intel_dp_dpcd_set_probe(struct intel_dp *intel_dp, bool force_on_external); +bool intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state); #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 41228478b21c..12084a542fc5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -225,19 +225,6 @@ intel_dp_aux_hdr_set_aux_backlight(const struct drm_connector_state *conn_state, connector->base.base.id, connector->base.name); } -static bool -intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state) -{ - struct hdr_output_metadata *hdr_metadata; - - if (!conn_state->hdr_output_metadata) - return false; - - hdr_metadata = conn_state->hdr_output_metadata->data; - - return hdr_metadata->hdmi_metadata_type1.eotf == HDMI_EOTF_SMPTE_ST2084; -} - static void intel_dp_aux_hdr_set_backlight(const struct drm_connector_state *conn_state, u32 level) { diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index a479b63112ea..27f3716bdc1f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -22,6 +22,7 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> #include <drm/display/drm_dp_helper.h> #include <drm/drm_print.h> @@ -478,12 +479,13 @@ static u8 intel_dp_get_lane_adjust_train(struct intel_dp *intel_dp, _TRAIN_REQ_TX_FFE_ARGS(link_status, 2), \ _TRAIN_REQ_TX_FFE_ARGS(link_status, 3) -void +bool intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, const u8 link_status[DP_LINK_STATUS_SIZE]) { + bool changed = false; int lane; if (intel_dp_is_uhbr(crtc_state)) { @@ -502,10 +504,17 @@ intel_dp_get_adjust_train(struct intel_dp *intel_dp, TRAIN_REQ_PREEMPH_ARGS(link_status)); } - for (lane = 0; lane < 4; lane++) - intel_dp->train_set[lane] = - intel_dp_get_lane_adjust_train(intel_dp, crtc_state, - dp_phy, link_status, lane); + for (lane = 0; lane < 4; lane++) { + u8 new = intel_dp_get_lane_adjust_train(intel_dp, crtc_state, + dp_phy, link_status, lane); + if (intel_dp->train_set[lane] == new) + continue; + + intel_dp->train_set[lane] = new; + changed = true; + } + + return changed; } static int intel_dp_training_pattern_set_reg(struct intel_dp *intel_dp, @@ -758,6 +767,63 @@ void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, } } +/* + * Pick Training Pattern Sequence (TPS) for channel equalization. 128b/132b TPS2 + * for UHBR+, TPS4 for HBR3 or for 1.4 devices that support it, TPS3 for HBR2 or + * 1.2 devices that support it, TPS2 otherwise. + */ +static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + enum drm_dp_phy dp_phy) +{ + struct intel_display *display = to_intel_display(intel_dp); + bool source_tps3, sink_tps3, source_tps4, sink_tps4; + + /* UHBR+ use separate 128b/132b TPS2 */ + if (intel_dp_is_uhbr(crtc_state)) + return DP_TRAINING_PATTERN_2; + + /* + * TPS4 support is mandatory for all downstream devices that + * support HBR3. There are no known eDP panels that support + * TPS4 as of Feb 2018 as per VESA eDP_v1.4b_E1 specification. + * LTTPRs must support TPS4. + */ + source_tps4 = intel_dp_source_supports_tps4(display); + sink_tps4 = dp_phy != DP_PHY_DPRX || + drm_dp_tps4_supported(intel_dp->dpcd); + if (source_tps4 && sink_tps4) { + return DP_TRAINING_PATTERN_4; + } else if (crtc_state->port_clock == 810000) { + if (!source_tps4) + lt_dbg(intel_dp, dp_phy, + "8.1 Gbps link rate without source TPS4 support\n"); + if (!sink_tps4) + lt_dbg(intel_dp, dp_phy, + "8.1 Gbps link rate without sink TPS4 support\n"); + } + + /* + * TPS3 support is mandatory for downstream devices that + * support HBR2. However, not all sinks follow the spec. + */ + source_tps3 = intel_dp_source_supports_tps3(display); + sink_tps3 = dp_phy != DP_PHY_DPRX || + drm_dp_tps3_supported(intel_dp->dpcd); + if (source_tps3 && sink_tps3) { + return DP_TRAINING_PATTERN_3; + } else if (crtc_state->port_clock >= 540000) { + if (!source_tps3) + lt_dbg(intel_dp, dp_phy, + ">=5.4/6.48 Gbps link rate without source TPS3 support\n"); + if (!sink_tps3) + lt_dbg(intel_dp, dp_phy, + ">=5.4/6.48 Gbps link rate without sink TPS3 support\n"); + } + + return DP_TRAINING_PATTERN_2; +} + static void intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) @@ -950,63 +1016,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp, } /* - * Pick Training Pattern Sequence (TPS) for channel equalization. 128b/132b TPS2 - * for UHBR+, TPS4 for HBR3 or for 1.4 devices that support it, TPS3 for HBR2 or - * 1.2 devices that support it, TPS2 otherwise. - */ -static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - enum drm_dp_phy dp_phy) -{ - struct intel_display *display = to_intel_display(intel_dp); - bool source_tps3, sink_tps3, source_tps4, sink_tps4; - - /* UHBR+ use separate 128b/132b TPS2 */ - if (intel_dp_is_uhbr(crtc_state)) - return DP_TRAINING_PATTERN_2; - - /* - * TPS4 support is mandatory for all downstream devices that - * support HBR3. There are no known eDP panels that support - * TPS4 as of Feb 2018 as per VESA eDP_v1.4b_E1 specification. - * LTTPRs must support TPS4. - */ - source_tps4 = intel_dp_source_supports_tps4(display); - sink_tps4 = dp_phy != DP_PHY_DPRX || - drm_dp_tps4_supported(intel_dp->dpcd); - if (source_tps4 && sink_tps4) { - return DP_TRAINING_PATTERN_4; - } else if (crtc_state->port_clock == 810000) { - if (!source_tps4) - lt_dbg(intel_dp, dp_phy, - "8.1 Gbps link rate without source TPS4 support\n"); - if (!sink_tps4) - lt_dbg(intel_dp, dp_phy, - "8.1 Gbps link rate without sink TPS4 support\n"); - } - - /* - * TPS3 support is mandatory for downstream devices that - * support HBR2. However, not all sinks follow the spec. - */ - source_tps3 = intel_dp_source_supports_tps3(display); - sink_tps3 = dp_phy != DP_PHY_DPRX || - drm_dp_tps3_supported(intel_dp->dpcd); - if (source_tps3 && sink_tps3) { - return DP_TRAINING_PATTERN_3; - } else if (crtc_state->port_clock >= 540000) { - if (!source_tps3) - lt_dbg(intel_dp, dp_phy, - ">=5.4/6.48 Gbps link rate without source TPS3 support\n"); - if (!sink_tps3) - lt_dbg(intel_dp, dp_phy, - ">=5.4/6.48 Gbps link rate without sink TPS3 support\n"); - } - - return DP_TRAINING_PATTERN_2; -} - -/* * Perform the link training channel equalization phase on the given DP PHY * using one of training pattern 2, 3 or 4 depending on the source and * sink capabilities. @@ -1127,16 +1136,19 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp, { struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int ret; intel_dp->link.active = true; - intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); intel_dp_program_link_training_pattern(intel_dp, crtc_state, DP_PHY_DPRX, DP_TRAINING_PATTERN_DISABLE); - if (intel_dp_is_uhbr(crtc_state) && - wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { - lt_dbg(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clearing\n"); + if (intel_dp_is_uhbr(crtc_state)) { + ret = poll_timeout_us(ret = intel_dp_128b132b_intra_hop(intel_dp, crtc_state), + ret == 0, + 500, 500 * 1000, false); + if (ret) + lt_dbg(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clearing\n"); } intel_hpd_unblock(encoder); @@ -1371,8 +1383,8 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp, if (ret) ret = intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); - if (intel_dp->set_idle_link_train) - intel_dp->set_idle_link_train(intel_dp, crtc_state); + intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); + intel_dp->set_idle_link_train(intel_dp, crtc_state); return ret; } @@ -1574,8 +1586,12 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, int lttpr_count) { bool passed = false; + int ret; - if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { + ret = poll_timeout_us(ret = intel_dp_128b132b_intra_hop(intel_dp, crtc_state), + ret == 0, + 500, 500 * 1000, false); + if (ret) { lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n"); goto out; } @@ -1602,6 +1618,8 @@ out: intel_dp_program_link_training_pattern(intel_dp, crtc_state, DP_PHY_DPRX, DP_TRAINING_PATTERN_2); + intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); + return passed; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h index 46614124569f..1ba22ed6db08 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h @@ -23,7 +23,7 @@ void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, int link_bw, int rate_select, int lane_count, bool enhanced_framing); -void intel_dp_get_adjust_train(struct intel_dp *intel_dp, +bool intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, const u8 link_status[DP_LINK_STATUS_SIZE]); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 74497c9a0554..352f7ef29c28 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -611,12 +611,15 @@ adjust_limits_for_dsc_hblank_expansion_quirk(struct intel_dp *intel_dp, static bool mst_stream_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) { - if (!intel_dp_compute_config_limits(intel_dp, connector, + struct intel_connector *connector = + to_intel_connector(conn_state->connector); + + if (!intel_dp_compute_config_limits(intel_dp, conn_state, crtc_state, false, dsc, limits)) return false; @@ -665,7 +668,7 @@ static int mst_stream_compute_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !mst_stream_compute_config_limits(intel_dp, connector, + !mst_stream_compute_config_limits(intel_dp, conn_state, pipe_config, false, &limits); if (!dsc_needed) { @@ -691,7 +694,7 @@ static int mst_stream_compute_config(struct intel_encoder *encoder, str_yes_no(intel_dp->force_dsc_en)); - if (!mst_stream_compute_config_limits(intel_dp, connector, + if (!mst_stream_compute_config_limits(intel_dp, conn_state, pipe_config, true, &limits)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_test.c b/drivers/gpu/drm/i915/display/intel_dp_test.c index 6ed5012c5fac..5cfa1dd411da 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_test.c +++ b/drivers/gpu/drm/i915/display/intel_dp_test.c @@ -6,7 +6,6 @@ #include <drm/display/drm_dp.h> #include <drm/display/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_file.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> @@ -753,13 +752,12 @@ static const struct { void intel_dp_test_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; int i; for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) { debugfs_create_file(intel_display_debugfs_files[i].name, 0644, - minor->debugfs_root, + display->drm->debugfs_root, display, intel_display_debugfs_files[i].fops); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 33e0398120c8..8ea96cc524a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2046,6 +2046,7 @@ static void bxt_ddi_pll_enable(struct intel_display *display, enum dpio_phy phy = DPIO_PHY0; enum dpio_channel ch = DPIO_CH0; u32 temp; + int ret; bxt_port_to_phy_channel(display, port, &phy, &ch); @@ -2056,8 +2057,10 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_POWER_ENABLE); - if (wait_for_us((intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_POWER_STATE), 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, PORT_PLL_POWER_STATE, + 200, 0, NULL); + if (ret) drm_err(display->drm, "Power state not set for PLL:%d\n", port); } @@ -2119,8 +2122,10 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_ENABLE); intel_de_posting_read(display, BXT_PORT_PLL_ENABLE(port)); - if (wait_for_us((intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK), - 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_LOCK, PORT_PLL_LOCK, + 200, 0, NULL); + if (ret) drm_err(display->drm, "PLL %d not locked\n", port); if (display->platform.geminilake) { @@ -2144,6 +2149,7 @@ static void bxt_ddi_pll_disable(struct intel_display *display, struct intel_dpll *pll) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ + int ret; intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), PORT_PLL_ENABLE, 0); intel_de_posting_read(display, BXT_PORT_PLL_ENABLE(port)); @@ -2152,8 +2158,10 @@ static void bxt_ddi_pll_disable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), PORT_PLL_POWER_ENABLE, 0); - if (wait_for_us(!(intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_POWER_STATE), 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, 0, + 200, 0, NULL); + if (ret) drm_err(display->drm, "Power state not reset for PLL:%d\n", port); } diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index aea249e2699f..c0a817018d08 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -33,8 +33,6 @@ i915_vm_to_dpt(struct i915_address_space *vm) return container_of(vm, struct i915_dpt, vm); } -#define dpt_total_entries(dpt) ((dpt)->vm.total >> PAGE_SHIFT) - static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { writeq(pte, addr); @@ -322,5 +320,5 @@ void intel_dpt_destroy(struct i915_address_space *vm) u64 intel_dpt_offset(struct i915_vma *dpt_vma) { - return dpt_vma->node.start; + return i915_vma_offset(dpt_vma); } diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 53d8ae3a70e9..dee44d45b668 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -4,10 +4,11 @@ * */ +#include <linux/iopoll.h> + #include <drm/drm_print.h> #include <drm/drm_vblank.h> -#include "i915_utils.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_regs.h" @@ -871,8 +872,13 @@ void intel_dsb_wait(struct intel_dsb *dsb) struct intel_crtc *crtc = dsb->crtc; struct intel_display *display = to_intel_display(crtc->base.dev); enum pipe pipe = crtc->pipe; + bool is_busy; + int ret; - if (wait_for(!is_dsb_busy(display, pipe, dsb->id), 1)) { + ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id), + !is_busy, + 100, 1000, false); + if (ret) { u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf); intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index e6a851d276f8..23402408e172 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -777,7 +777,7 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->init_count = mipi_config->master_init_timer; intel_dsi->bw_timer = mipi_config->dbi_bw_timer; intel_dsi->video_frmt_cfg_bits = - mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0; + mipi_config->bta_disable ? DISABLE_VIDEO_BTA : 0; intel_dsi->bgr_enabled = mipi_config->rgb_flip; /* Starting point, adjusted depending on dual link and burst mode */ diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h new file mode 100644 index 000000000000..edc7331dcca2 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_DSI_VBT_DEFS_H__ +#define __INTEL_DSI_VBT_DEFS_H__ + +#include <linux/types.h> + +/* + * MIPI Sequence Block definitions + * + * Note the VBT spec has AssertReset / DeassertReset swapped from their + * usual naming, we use the proper names here to avoid confusion when + * reading the code. + */ +enum mipi_seq { + MIPI_SEQ_END = 0, + MIPI_SEQ_DEASSERT_RESET, /* Spec says MipiAssertResetPin */ + MIPI_SEQ_INIT_OTP, + MIPI_SEQ_DISPLAY_ON, + MIPI_SEQ_DISPLAY_OFF, + MIPI_SEQ_ASSERT_RESET, /* Spec says MipiDeassertResetPin */ + MIPI_SEQ_BACKLIGHT_ON, /* sequence block v2+ */ + MIPI_SEQ_BACKLIGHT_OFF, /* sequence block v2+ */ + MIPI_SEQ_TEAR_ON, /* sequence block v2+ */ + MIPI_SEQ_TEAR_OFF, /* sequence block v3+ */ + MIPI_SEQ_POWER_ON, /* sequence block v3+ */ + MIPI_SEQ_POWER_OFF, /* sequence block v3+ */ + MIPI_SEQ_MAX +}; + +enum mipi_seq_element { + MIPI_SEQ_ELEM_END = 0, + MIPI_SEQ_ELEM_SEND_PKT, + MIPI_SEQ_ELEM_DELAY, + MIPI_SEQ_ELEM_GPIO, + MIPI_SEQ_ELEM_I2C, /* sequence block v2+ */ + MIPI_SEQ_ELEM_SPI, /* sequence block v3+ */ + MIPI_SEQ_ELEM_PMIC, /* sequence block v3+ */ + MIPI_SEQ_ELEM_MAX +}; + +#define MIPI_DSI_UNDEFINED_PANEL_ID 0 +#define MIPI_DSI_GENERIC_PANEL_ID 1 + +struct mipi_config { + u16 panel_id; + + /* General Params */ + struct { + u32 enable_dithering:1; + u32 rsvd1:1; + u32 is_bridge:1; + + u32 panel_arch_type:2; + u32 is_cmd_mode:1; + +#define NON_BURST_SYNC_PULSE 0x1 +#define NON_BURST_SYNC_EVENTS 0x2 +#define BURST_MODE 0x3 + u32 video_transfer_mode:2; + + u32 cabc_supported:1; +#define PPS_BLC_PMIC 0 +#define PPS_BLC_SOC 1 + u32 pwm_blc:1; + +#define PIXEL_FORMAT_RGB565 0x1 +#define PIXEL_FORMAT_RGB666 0x2 +#define PIXEL_FORMAT_RGB666_LOOSELY_PACKED 0x3 +#define PIXEL_FORMAT_RGB888 0x4 + u32 videomode_color_format:4; + +#define ENABLE_ROTATION_0 0x0 +#define ENABLE_ROTATION_90 0x1 +#define ENABLE_ROTATION_180 0x2 +#define ENABLE_ROTATION_270 0x3 + u32 rotation:2; + u32 bta_disable:1; + u32 rsvd2:15; + } __packed; + + /* Port Desc */ + struct { +#define DUAL_LINK_NOT_SUPPORTED 0 +#define DUAL_LINK_FRONT_BACK 1 +#define DUAL_LINK_PIXEL_ALT 2 + u16 dual_link:2; + u16 lane_cnt:2; + u16 pixel_overlap:3; + u16 rgb_flip:1; +#define DL_DCS_PORT_A 0x00 +#define DL_DCS_PORT_C 0x01 +#define DL_DCS_PORT_A_AND_C 0x02 + u16 dl_dcs_cabc_ports:2; + u16 dl_dcs_backlight_ports:2; + u16 port_sync:1; /* 219-230 */ + u16 rsvd3:3; + } __packed; + + /* DSI Controller Parameters */ + struct { + u16 dsi_usage:1; + u16 rsvd4:15; + } __packed; + + u8 rsvd5; + u32 target_burst_mode_freq; + u32 dsi_ddr_clk; + u32 bridge_ref_clk; + + /* LP Byte Clock */ + struct { +#define BYTE_CLK_SEL_20MHZ 0 +#define BYTE_CLK_SEL_10MHZ 1 +#define BYTE_CLK_SEL_5MHZ 2 + u8 byte_clk_sel:2; + u8 rsvd6:6; + } __packed; + + /* DPhy Flags */ + struct { + u16 dphy_param_valid:1; + u16 eot_pkt_disabled:1; + u16 enable_clk_stop:1; + u16 blanking_packets_during_bllp:1; /* 219+ */ + u16 lp_clock_during_lpm:1; /* 219+ */ + u16 rsvd7:11; + } __packed; + + u32 hs_tx_timeout; + u32 lp_rx_timeout; + u32 turn_around_timeout; + u32 device_reset_timer; + u32 master_init_timer; + u32 dbi_bw_timer; + u32 lp_byte_clk_val; + + /* DPhy Params */ + struct { + u32 prepare_cnt:6; + u32 rsvd8:2; + u32 clk_zero_cnt:8; + u32 trail_cnt:5; + u32 rsvd9:3; + u32 exit_zero_cnt:6; + u32 rsvd10:2; + } __packed; + + u32 clk_lane_switch_cnt; + u32 hl_switch_cnt; + + u32 rsvd11[6]; + + /* timings based on dphy spec */ + u8 tclk_miss; + u8 tclk_post; + u8 rsvd12; + u8 tclk_pre; + u8 tclk_prepare; + u8 tclk_settle; + u8 tclk_term_enable; + u8 tclk_trail; + u16 tclk_prepare_clkzero; + u8 rsvd13; + u8 td_term_enable; + u8 teot; + u8 ths_exit; + u8 ths_prepare; + u16 ths_prepare_hszero; + u8 rsvd14; + u8 ths_settle; + u8 ths_skip; + u8 ths_trail; + u8 tinit; + u8 tlpx; + u8 rsvd15[3]; + + /* GPIOs */ + u8 panel_enable; + u8 bl_enable; + u8 pwm_enable; + u8 reset_r_n; + u8 pwr_down_r; + u8 stdby_r_n; +} __packed; + +/* all delays have a unit of 100us */ +struct mipi_pps_data { + u16 panel_on_delay; + u16 bl_enable_delay; + u16 bl_disable_delay; + u16 panel_off_delay; + u16 panel_power_cycle_delay; +} __packed; + +#endif /* __INTEL_DSI_VBT_DEFS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_encoder.c b/drivers/gpu/drm/i915/display/intel_encoder.c index 0b7bd26f4339..2ffe1f251ef8 100644 --- a/drivers/gpu/drm/i915/display/intel_encoder.c +++ b/drivers/gpu/drm/i915/display/intel_encoder.c @@ -8,6 +8,7 @@ #include "intel_display_core.h" #include "intel_display_types.h" #include "intel_encoder.h" +#include "intel_hotplug.h" static void intel_encoder_link_check_work_fn(struct work_struct *work) { @@ -37,6 +38,28 @@ void intel_encoder_link_check_queue_work(struct intel_encoder *encoder, int dela &encoder->link_check_work, msecs_to_jiffies(delay_ms)); } +void intel_encoder_unblock_all_hpds(struct intel_display *display) +{ + struct intel_encoder *encoder; + + if (!HAS_DISPLAY(display)) + return; + + for_each_intel_encoder(display->drm, encoder) + intel_hpd_unblock(encoder); +} + +void intel_encoder_block_all_hpds(struct intel_display *display) +{ + struct intel_encoder *encoder; + + if (!HAS_DISPLAY(display)) + return; + + for_each_intel_encoder(display->drm, encoder) + intel_hpd_block(encoder); +} + void intel_encoder_suspend_all(struct intel_display *display) { struct intel_encoder *encoder; @@ -80,3 +103,21 @@ void intel_encoder_shutdown_all(struct intel_display *display) if (encoder->shutdown_complete) encoder->shutdown_complete(encoder); } + +struct intel_digital_port *intel_dig_port_alloc(void) +{ + struct intel_digital_port *dig_port; + + dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + if (!dig_port) + return NULL; + + dig_port->hdmi.hdmi_reg = INVALID_MMIO_REG; + dig_port->dp.output_reg = INVALID_MMIO_REG; + dig_port->aux_ch = AUX_CH_NONE; + dig_port->max_lanes = 4; + + mutex_init(&dig_port->hdcp.mutex); + + return dig_port; +} diff --git a/drivers/gpu/drm/i915/display/intel_encoder.h b/drivers/gpu/drm/i915/display/intel_encoder.h index 3fa5589f0b1c..ace0fe1a8f27 100644 --- a/drivers/gpu/drm/i915/display/intel_encoder.h +++ b/drivers/gpu/drm/i915/display/intel_encoder.h @@ -6,6 +6,7 @@ #ifndef __INTEL_ENCODER_H__ #define __INTEL_ENCODER_H__ +struct intel_digital_port; struct intel_display; struct intel_encoder; @@ -17,4 +18,9 @@ void intel_encoder_link_check_flush_work(struct intel_encoder *encoder); void intel_encoder_suspend_all(struct intel_display *display); void intel_encoder_shutdown_all(struct intel_display *display); +void intel_encoder_block_all_hpds(struct intel_display *display); +void intel_encoder_unblock_all_hpds(struct intel_display *display); + +struct intel_digital_port *intel_dig_port_alloc(void); + #endif /* __INTEL_ENCODER_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 0da842bd2f2f..b210c3250501 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -11,6 +11,7 @@ #include <drm/drm_modeset_helper.h> #include "i915_drv.h" +#include "i915_utils.h" #include "intel_bo.h" #include "intel_display.h" #include "intel_display_core.h" diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 5a0151775a3a..45af04cb0fb2 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -11,6 +11,7 @@ #include "gem/i915_gem_object.h" #include "i915_drv.h" +#include "i915_vma.h" #include "intel_display_core.h" #include "intel_display_rpm.h" #include "intel_display_types.h" @@ -151,7 +152,7 @@ intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb, * happy to scanout from anywhere within its global aperture. */ pinctl = 0; - if (HAS_GMCH(dev_priv)) + if (HAS_GMCH(display)) pinctl |= PIN_MAPPABLE; i915_gem_ww_ctx_init(&ww, true); @@ -192,7 +193,7 @@ retry: * mode that matches the user configuration. */ ret = i915_vma_pin_fence(vma); - if (ret != 0 && DISPLAY_VER(dev_priv) < 4) { + if (ret != 0 && DISPLAY_VER(display) < 4) { i915_vma_unpin(vma); goto err_unpin; } @@ -260,6 +261,7 @@ intel_plane_fb_vtd_guard(const struct intel_plane_state *plane_state) int intel_plane_pin_fb(struct intel_plane_state *plane_state, const struct intel_plane_state *old_plane_state) { + struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); const struct intel_framebuffer *fb = to_intel_framebuffer(plane_state->hw.fb); @@ -277,17 +279,6 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state, plane_state->ggtt_vma = vma; - /* - * Pre-populate the dma address before we enter the vblank - * evade critical section as i915_gem_object_get_dma_address() - * will trigger might_sleep() even if it won't actually sleep, - * which is the case when the fb has already been pinned. - */ - if (intel_plane_needs_physical(plane)) { - struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); - - plane_state->phys_dma_addr = i915_gem_object_get_dma_address(obj, 0); - } } else { unsigned int alignment = intel_plane_fb_min_alignment(plane_state); @@ -309,6 +300,28 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state, plane_state->dpt_vma = vma; WARN_ON(plane_state->ggtt_vma == plane_state->dpt_vma); + + /* + * The DPT object contains only one vma, and there is no VT-d + * guard, so the VMA's offset within the DPT is always 0. + */ + drm_WARN_ON(display->drm, intel_dpt_offset(plane_state->dpt_vma)); + } + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (intel_plane_needs_physical(plane)) { + struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); + + plane_state->surf = i915_gem_object_get_dma_address(obj, 0) + + plane->surf_offset(plane_state); + } else { + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma) + + plane->surf_offset(plane_state); } return 0; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 685ac98bd001..d4c5deff9cbe 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1460,7 +1460,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (intel_display_needs_wa_16023588340(display)) { + if (intel_display_wa(display, 16023588340)) { plane_state->no_fbc_reason = "Wa_16023588340"; return 0; } @@ -2240,10 +2240,9 @@ void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc) /* FIXME: remove this once igt is on board with per-crtc stuff */ void intel_fbc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; struct intel_fbc *fbc; fbc = display->fbc[INTEL_FBC_A]; if (fbc) - intel_fbc_debugfs_add(fbc, minor->debugfs_root); + intel_fbc_debugfs_add(fbc, display->drm->debugfs_root); } diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index 8039a84671cc..59a36b3a22c1 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -292,34 +292,6 @@ int intel_fdi_link_freq(struct intel_display *display, return display->fdi.pll_freq; } -/** - * intel_fdi_compute_pipe_bpp - compute pipe bpp limited by max link bpp - * @crtc_state: the crtc state - * - * Compute the pipe bpp limited by the CRTC's maximum link bpp. Encoders can - * call this function during state computation in the simple case where the - * link bpp will always match the pipe bpp. This is the case for all non-DP - * encoders, while DP encoders will use a link bpp lower than pipe bpp in case - * of DSC compression. - * - * Returns %true in case of success, %false if pipe bpp would need to be - * reduced below its valid range. - */ -bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state) -{ - int pipe_bpp = min(crtc_state->pipe_bpp, - fxp_q4_to_int(crtc_state->max_link_bpp_x16)); - - pipe_bpp = rounddown(pipe_bpp, 2 * 3); - - if (pipe_bpp < 6 * 3) - return false; - - crtc_state->pipe_bpp = pipe_bpp; - - return true; -} - int ilk_fdi_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { diff --git a/drivers/gpu/drm/i915/display/intel_fdi.h b/drivers/gpu/drm/i915/display/intel_fdi.h index ad5e103c38a8..1cd08df9b0c2 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.h +++ b/drivers/gpu/drm/i915/display/intel_fdi.h @@ -20,7 +20,6 @@ struct intel_link_bw_limits; int intel_fdi_add_affected_crtcs(struct intel_atomic_state *state); int intel_fdi_link_freq(struct intel_display *display, const struct intel_crtc_state *pipe_config); -bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state); int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, struct intel_crtc_state *pipe_config); int intel_fdi_atomic_check_link(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 000a898c9480..30eff6009e87 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -13,6 +13,36 @@ #include "intel_display_types.h" #include "intel_global_state.h" +#define for_each_new_global_obj_in_state(__state, obj, new_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ + (__i)++) \ + for_each_if(obj) + +#define for_each_old_global_obj_in_state(__state, obj, old_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (old_obj_state) = (__state)->global_objs[__i].old_state, 1); \ + (__i)++) \ + for_each_if(obj) + +#define for_each_oldnew_global_obj_in_state(__state, obj, old_obj_state, new_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (old_obj_state) = (__state)->global_objs[__i].old_state, \ + (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ + (__i)++) \ + for_each_if(obj) + +struct intel_global_objs_state { + struct intel_global_obj *ptr; + struct intel_global_state *state, *old_state, *new_state; +}; + struct intel_global_commit { struct kref ref; struct completion done; @@ -148,7 +178,7 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, struct intel_display *display = to_intel_display(state); int index, num_objs, i; size_t size; - struct __intel_global_objs_state *arr; + struct intel_global_objs_state *arr; struct intel_global_state *obj_state; for (i = 0; i < state->num_global_objs; i++) diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index d42fb2547ee9..e1efa530cc86 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -11,6 +11,7 @@ struct intel_atomic_state; struct intel_display; +struct intel_global_commit; struct intel_global_obj; struct intel_global_state; @@ -26,36 +27,6 @@ struct intel_global_obj { const struct intel_global_state_funcs *funcs; }; -#define intel_for_each_global_obj(obj, dev_priv) \ - list_for_each_entry(obj, &(dev_priv)->display.global.obj_list, head) - -#define for_each_new_global_obj_in_state(__state, obj, new_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ - (__i)++) \ - for_each_if(obj) - -#define for_each_old_global_obj_in_state(__state, obj, old_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (old_obj_state) = (__state)->global_objs[__i].old_state, 1); \ - (__i)++) \ - for_each_if(obj) - -#define for_each_oldnew_global_obj_in_state(__state, obj, old_obj_state, new_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (old_obj_state) = (__state)->global_objs[__i].old_state, \ - (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ - (__i)++) \ - for_each_if(obj) - -struct intel_global_commit; - struct intel_global_state { struct intel_global_obj *obj; struct intel_atomic_state *state; @@ -64,11 +35,6 @@ struct intel_global_state { bool changed, serialized; }; -struct __intel_global_objs_state { - struct intel_global_obj *ptr; - struct intel_global_state *state, *old_state, *new_state; -}; - void intel_atomic_global_obj_init(struct intel_display *display, struct intel_global_obj *obj, struct intel_global_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 0d73f32fe7f1..358210adb8f8 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -30,6 +30,7 @@ #include <linux/export.h> #include <linux/i2c-algo-bit.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <drm/display/drm_hdcp_helper.h> @@ -39,6 +40,7 @@ #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_gmbus.h" #include "intel_gmbus_regs.h" @@ -217,7 +219,7 @@ static void pnv_gmbus_clock_gating(struct intel_display *display, bool enable) { /* When using bit bashing for I2C, this bit needs to be set to 1 */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, DSPCLK_GATE_D, PNV_GMBUSUNIT_CLOCK_GATE_DISABLE, !enable ? PNV_GMBUSUNIT_CLOCK_GATE_DISABLE : 0); } @@ -240,14 +242,20 @@ static void bxt_gmbus_clock_gating(struct intel_display *display, static u32 get_reserved(struct intel_gmbus *bus) { struct intel_display *display = bus->display; - u32 reserved = 0; + u32 preserve_bits = 0; + + if (display->platform.i830 || display->platform.i845g) + return 0; /* On most chips, these bits must be preserved in software. */ - if (!display->platform.i830 && !display->platform.i845g) - reserved = intel_de_read_notrace(display, bus->gpio_reg) & - (GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE); + preserve_bits |= GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE; + + /* Wa_16025573575: the masks bits need to be preserved through out */ + if (intel_display_wa(display, 16025573575)) + preserve_bits |= GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_VAL_MASK | + GPIO_DATA_DIR_MASK | GPIO_DATA_VAL_MASK; - return reserved; + return intel_de_read_notrace(display, bus->gpio_reg) & preserve_bits; } static int get_clock(void *data) @@ -308,6 +316,22 @@ static void set_data(void *data, int state_high) intel_de_posting_read(display, bus->gpio_reg); } +static void +ptl_handle_mask_bits(struct intel_gmbus *bus, bool set) +{ + struct intel_display *display = bus->display; + u32 reg_val = intel_de_read_notrace(display, bus->gpio_reg); + u32 mask_bits = GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_VAL_MASK | + GPIO_DATA_DIR_MASK | GPIO_DATA_VAL_MASK; + if (set) + reg_val |= mask_bits; + else + reg_val &= ~mask_bits; + + intel_de_write_notrace(display, bus->gpio_reg, reg_val); + intel_de_posting_read(display, bus->gpio_reg); +} + static int intel_gpio_pre_xfer(struct i2c_adapter *adapter) { @@ -319,6 +343,9 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter) if (display->platform.pineview) pnv_gmbus_clock_gating(display, false); + if (intel_display_wa(display, 16025573575)) + ptl_handle_mask_bits(bus, true); + set_data(bus, 1); set_clock(bus, 1); udelay(I2C_RISEFALL_TIME); @@ -336,6 +363,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter) if (display->platform.pineview) pnv_gmbus_clock_gating(display, true); + + if (intel_display_wa(display, 16025573575)) + ptl_handle_mask_bits(bus, false); } static void @@ -385,11 +415,14 @@ static int gmbus_wait(struct intel_display *display, u32 status, u32 irq_en) intel_de_write_fw(display, GMBUS4(display), irq_en); status |= GMBUS_SATOER; - ret = wait_for_us((gmbus2 = intel_de_read_fw(display, GMBUS2(display))) & status, - 2); + + ret = poll_timeout_us_atomic(gmbus2 = intel_de_read_fw(display, GMBUS2(display)), + gmbus2 & status, + 0, 2, false); if (ret) - ret = wait_for((gmbus2 = intel_de_read_fw(display, GMBUS2(display))) & status, - 50); + ret = poll_timeout_us(gmbus2 = intel_de_read_fw(display, GMBUS2(display)), + gmbus2 & status, + 500, 50 * 1000, false); intel_de_write_fw(display, GMBUS4(display), 0); remove_wait_queue(&display->gmbus.wait_queue, &wait); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 42202c8bb066..531ee122bf82 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -11,6 +11,7 @@ #include <linux/component.h> #include <linux/debugfs.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/random.h> #include <drm/display/drm_hdcp_helper.h> @@ -326,16 +327,13 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *dig_port, bool ksv_ready; /* Poll for ksv list ready (spec says max time allowed is 5s) */ - ret = __wait_for(read_ret = shim->read_ksv_ready(dig_port, - &ksv_ready), - read_ret || ksv_ready, 5 * 1000 * 1000, 1000, - 100 * 1000); + ret = poll_timeout_us(read_ret = shim->read_ksv_ready(dig_port, &ksv_ready), + read_ret || ksv_ready, + 100 * 1000, 5 * 1000 * 1000, false); if (ret) return ret; if (read_ret) return read_ret; - if (!ksv_ready) - return -ETIMEDOUT; return 0; } @@ -817,6 +815,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) enum port port = dig_port->base.port; unsigned long r0_prime_gen_start; int ret, i, tries = 2; + u32 val; union { u32 reg[2]; u8 shim[DRM_HDCP_AN_LEN]; @@ -905,8 +904,10 @@ static int intel_hdcp_auth(struct intel_connector *connector) HDCP_CONF_AUTH_AND_ENC); /* Wait for R0 ready */ - if (wait_for(intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { + ret = poll_timeout_us(val = intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)), + val & (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), + 100, 1000, false); + if (ret) { drm_err(display->drm, "Timed out waiting for R0 ready\n"); return -ETIMEDOUT; } @@ -938,16 +939,16 @@ static int intel_hdcp_auth(struct intel_connector *connector) ri.reg); /* Wait for Ri prime match */ - if (!wait_for(intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) + ret = poll_timeout_us(val = intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)), + val & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), + 100, 1000, false); + if (!ret) break; } if (i == tries) { drm_dbg_kms(display->drm, - "Timed out waiting for Ri prime match (%x)\n", - intel_de_read(display, - HDCP_STATUS(display, cpu_transcoder, port))); + "Timed out waiting for Ri prime match (%x)\n", val); return -ETIMEDOUT; } @@ -2446,12 +2447,6 @@ static int _intel_hdcp_enable(struct intel_atomic_state *state, if (!hdcp->shim) return -ENOENT; - if (!connector->encoder) { - drm_err(display->drm, "[CONNECTOR:%d:%s] encoder is not initialized\n", - connector->base.base.id, connector->base.name); - return -ENODEV; - } - mutex_lock(&hdcp->mutex); mutex_lock(&dig_port->hdcp.mutex); drm_WARN_ON(display->drm, diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 9961ff259298..4ab7e2e3bfd4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -29,6 +29,7 @@ #include <linux/delay.h> #include <linux/hdmi.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/slab.h> #include <linux/string_helpers.h> @@ -60,6 +61,7 @@ #include "intel_hdcp_regs.h" #include "intel_hdcp_shim.h" #include "intel_hdmi.h" +#include "intel_link_bw.h" #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_pfit.h" @@ -1582,9 +1584,9 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port, intel_de_write(display, HDCP_RPRIME(display, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (wait_for((intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) == - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + ret = intel_de_wait_for_set(display, HDCP_STATUS(display, cpu_transcoder, port), + HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC, 1); + if (ret) { drm_dbg_kms(display->drm, "Ri' mismatch detected (%x)\n", intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port))); @@ -1689,11 +1691,10 @@ intel_hdmi_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, if (timeout < 0) return timeout; - ret = __wait_for(ret = hdcp2_detect_msg_availability(dig_port, - msg_id, &msg_ready, - &msg_sz), - !ret && msg_ready && msg_sz, timeout * 1000, - 1000, 5 * 1000); + ret = poll_timeout_us(ret = hdcp2_detect_msg_availability(dig_port, msg_id, + &msg_ready, &msg_sz), + !ret && msg_ready && msg_sz, + 4000, timeout * 1000, false); if (ret) drm_dbg_kms(display->drm, "msg_id: %d, ret: %d, timeout: %d\n", @@ -2053,6 +2054,10 @@ intel_hdmi_mode_valid(struct drm_connector *_connector, else sink_format = INTEL_OUTPUT_FORMAT_RGB; + status = intel_pfit_mode_valid(display, mode, sink_format, 0); + if (status != MODE_OK) + return status; + status = intel_hdmi_mode_clock_valid(&connector->base, clock, has_hdmi_sink, sink_format); if (status != MODE_OK) { if (ycbcr_420_only || @@ -2341,6 +2346,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; + if (!intel_link_bw_compute_pipe_bpp(pipe_config)) + return -EINVAL; + pipe_config->has_audio = intel_hdmi_has_audio(encoder, pipe_config, conn_state) && intel_audio_compute_config(encoder, pipe_config, conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 265aa97fcc75..4451a792600a 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -28,6 +28,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "i915_utils.h" #include "intel_connector.h" #include "intel_display_power.h" #include "intel_display_core.h" @@ -971,8 +972,6 @@ void intel_hpd_cancel_work(struct intel_display *display) spin_lock_irq(&display->irq.lock); - drm_WARN_ON(display->drm, get_blocked_hpd_pin_mask(display)); - display->hotplug.long_hpd_pin_mask = 0; display->hotplug.short_hpd_pin_mask = 0; display->hotplug.event_bits = 0; @@ -1333,12 +1332,12 @@ static const struct file_operations i915_hpd_short_storm_ctl_fops = { void intel_hpd_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_hpd_storm_ctl", 0644, minor->debugfs_root, + debugfs_create_file("i915_hpd_storm_ctl", 0644, debugfs_root, display, &i915_hpd_storm_ctl_fops); - debugfs_create_file("i915_hpd_short_storm_ctl", 0644, minor->debugfs_root, + debugfs_create_file("i915_hpd_short_storm_ctl", 0644, debugfs_root, display, &i915_hpd_short_storm_ctl_fops); - debugfs_create_bool("i915_ignore_long_hpd", 0644, minor->debugfs_root, + debugfs_create_bool("i915_ignore_long_hpd", 0644, debugfs_root, &display->hotplug.ignore_long_hpd); } diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index 43aee70597bf..4f72f3fb9af5 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -1025,7 +1025,7 @@ static void mtp_tc_hpd_enable_detection(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - intel_de_rmw(display, SHOTPLUG_CTL_DDI, + intel_de_rmw(display, SHOTPLUG_CTL_TC, mtp_tc_hotplug_mask(encoder->hpd_pin), mtp_tc_hotplug_enables(encoder)); } diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index 3caef7f9c7c4..f52dee0ea412 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -165,6 +165,34 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, } /** + * intel_link_bw_compute_pipe_bpp - compute pipe bpp limited by max link bpp + * @crtc_state: the crtc state + * + * Compute the pipe bpp limited by the CRTC's maximum link bpp. Encoders can + * call this function during state computation in the simple case where the + * link bpp will always match the pipe bpp. This is the case for all non-DP + * encoders, while DP encoders will use a link bpp lower than pipe bpp in case + * of DSC compression. + * + * Returns %true in case of success, %false if pipe bpp would need to be + * reduced below its valid range. + */ +bool intel_link_bw_compute_pipe_bpp(struct intel_crtc_state *crtc_state) +{ + int pipe_bpp = min(crtc_state->pipe_bpp, + fxp_q4_to_int(crtc_state->max_link_bpp_x16)); + + pipe_bpp = rounddown(pipe_bpp, 2 * 3); + + if (pipe_bpp < 6 * 3) + return false; + + crtc_state->pipe_bpp = pipe_bpp; + + return true; +} + +/** * intel_link_bw_set_bpp_limit_for_pipe - set link bpp limit for a pipe to its minimum * @state: atomic state * @old_limits: link BW limits @@ -449,6 +477,7 @@ void intel_link_bw_connector_debugfs_add(struct intel_connector *connector) switch (connector->base.connector_type) { case DRM_MODE_CONNECTOR_DisplayPort: case DRM_MODE_CONNECTOR_eDP: + case DRM_MODE_CONNECTOR_HDMIA: break; case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_SVIDEO: @@ -458,11 +487,6 @@ void intel_link_bw_connector_debugfs_add(struct intel_connector *connector) break; return; - case DRM_MODE_CONNECTOR_HDMIA: - if (HAS_FDI(display) && !HAS_DDI(display)) - break; - - return; default: return; } diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index b499042e62b1..95ab7c50c61d 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -27,6 +27,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, struct intel_link_bw_limits *limits, u8 pipe_mask, const char *reason); +bool intel_link_bw_compute_pipe_bpp(struct intel_crtc_state *crtc_state); bool intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, const struct intel_link_bw_limits *old_limits, struct intel_link_bw_limits *new_limits, diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index 666148a14522..42284e9928f2 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -68,9 +68,9 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/drm_print.h> #include <drm/intel/intel_lpe_audio.h> -#include "i915_drv.h" #include "i915_irq.h" #include "intel_audio_regs.h" #include "intel_de.h" @@ -170,14 +170,11 @@ static struct irq_chip lpe_audio_irqchip = { static int lpe_audio_irq_init(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int irq = display->audio.lpe.irq; - drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv)); - irq_set_chip_and_handler_name(irq, - &lpe_audio_irqchip, - handle_simple_irq, - "hdmi_lpe_audio_irq_handler"); + irq_set_chip_and_handler_name(irq, &lpe_audio_irqchip, + handle_simple_irq, + "hdmi_lpe_audio_irq_handler"); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index abc4b562083d..d56026c4efdd 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -23,6 +23,8 @@ * */ +#include <linux/iopoll.h> + #include <drm/display/drm_dp_dual_mode_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/drm_atomic_helper.h> @@ -181,6 +183,8 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; + int timeout_us; + int ret; current_mode = lspcon_get_current_mode(lspcon); if (current_mode == mode) @@ -189,9 +193,12 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, drm_dbg_kms(display->drm, "Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, - lspcon_get_mode_settle_timeout(lspcon)); - if (current_mode != mode) + timeout_us = lspcon_get_mode_settle_timeout(lspcon) * 1000; + + ret = poll_timeout_us(current_mode = lspcon_get_current_mode(lspcon), + current_mode == mode, + 5000, timeout_us, false); + if (ret) drm_err(display->drm, "LSPCON mode hasn't settled\n"); out: diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 7e48a235c99f..48f4d8ed4f15 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -48,6 +48,7 @@ #include "intel_dpll.h" #include "intel_fdi.h" #include "intel_gmbus.h" +#include "intel_link_bw.h" #include "intel_lvds.h" #include "intel_lvds_regs.h" #include "intel_panel.h" @@ -433,7 +434,7 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(display)) { crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 81efdb17fc0c..cbc220310813 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -28,13 +28,13 @@ #include <linux/acpi.h> #include <linux/debugfs.h> #include <linux/dmi.h> +#include <linux/iopoll.h> #include <acpi/video.h> #include <drm/drm_edid.h> #include <drm/drm_file.h> #include <drm/drm_print.h> -#include "i915_utils.h" #include "intel_acpi.h" #include "intel_backlight.h" #include "intel_display_core.h" @@ -357,10 +357,12 @@ static int swsci(struct intel_display *display, pci_write_config_word(pdev, SWSCI, swsci_val); /* Poll for the result. */ -#define C (((scic = swsci->scic) & SWSCI_SCIC_INDICATOR) == 0) - if (wait_for(C, dslp)) { + ret = poll_timeout_us(scic = swsci->scic, + (scic & SWSCI_SCIC_INDICATOR) == 0, + 1000, dslp * 1000, false); + if (ret) { drm_dbg(display->drm, "SWSCI request timed out\n"); - return -ETIMEDOUT; + return ret; } scic = (scic & SWSCI_SCIC_EXIT_STATUS_MASK) >> @@ -1299,8 +1301,6 @@ DEFINE_SHOW_ATTRIBUTE(intel_opregion); void intel_opregion_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_opregion", 0444, minor->debugfs_root, + debugfs_create_file("i915_opregion", 0444, display->drm->debugfs_root, display, &intel_opregion_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 159a5f998ea0..272f9e7af4d4 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -217,10 +217,9 @@ static void i830_overlay_clock_gating(struct intel_display *display, /* WA_OVERLAY_CLKGATE:alm */ if (enable) - intel_de_write(display, DSPCLK_GATE_D(display), 0); + intel_de_write(display, DSPCLK_GATE_D, 0); else - intel_de_write(display, DSPCLK_GATE_D(display), - OVRUNIT_CLOCK_GATE_DISABLE); + intel_de_write(display, DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); /* WA_DISABLE_L2CACHE_CLOCK_GATING:alm */ pci_bus_read_config_byte(pdev->bus, diff --git a/drivers/gpu/drm/i915/display/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h index cf4dab1b98bf..19cac7412d0a 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.h +++ b/drivers/gpu/drm/i915/display/intel_pch.h @@ -6,8 +6,6 @@ #ifndef __INTEL_PCH__ #define __INTEL_PCH__ -#include "intel_display_conversion.h" - struct intel_display; /* @@ -36,7 +34,7 @@ enum intel_pch { PCH_LNL, }; -#define INTEL_PCH_TYPE(_display) (__to_intel_display(_display)->pch_type) +#define INTEL_PCH_TYPE(_display) ((_display)->pch_type) #define HAS_PCH_DG2(display) (INTEL_PCH_TYPE(display) == PCH_DG2) #define HAS_PCH_ADP(display) (INTEL_PCH_TYPE(display) == PCH_ADP) #define HAS_PCH_DG1(display) (INTEL_PCH_TYPE(display) == PCH_DG1) diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c index d3c5255bf1a8..9ae53679a041 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c +++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c @@ -17,16 +17,22 @@ static void lpt_fdi_reset_mphy(struct intel_display *display) { + int ret; + intel_de_rmw(display, SOUTH_CHICKEN2, 0, FDI_MPHY_IOSFSB_RESET_CTL); - if (wait_for_us(intel_de_read(display, SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS, 100)) + ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, FDI_MPHY_IOSFSB_RESET_STATUS, + 100, 0, NULL); + if (ret) drm_err(display->drm, "FDI mPHY reset assert timeout\n"); intel_de_rmw(display, SOUTH_CHICKEN2, FDI_MPHY_IOSFSB_RESET_CTL, 0); - if (wait_for_us((intel_de_read(display, SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) + ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, 0, + 100, 0, NULL); + if (ret) drm_err(display->drm, "FDI mPHY reset de-assert timeout\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_pfit.c b/drivers/gpu/drm/i915/display/intel_pfit.c index 13541be4d6df..68539e7c2a24 100644 --- a/drivers/gpu/drm/i915/display/intel_pfit.c +++ b/drivers/gpu/drm/i915/display/intel_pfit.c @@ -14,6 +14,7 @@ #include "intel_lvds_regs.h" #include "intel_pfit.h" #include "intel_pfit_regs.h" +#include "skl_scaler.h" static int intel_pch_pfit_check_dst_window(const struct intel_crtc_state *crtc_state) { @@ -546,6 +547,16 @@ out: return intel_gmch_pfit_check_timings(crtc_state); } +enum drm_mode_status +intel_pfit_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes) +{ + return skl_scaler_mode_valid(display, mode, output_format, + num_joined_pipes); +} + int intel_pfit_compute_config(struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { diff --git a/drivers/gpu/drm/i915/display/intel_pfit.h b/drivers/gpu/drm/i915/display/intel_pfit.h index ef34f9b49d09..c1bb0d1f344e 100644 --- a/drivers/gpu/drm/i915/display/intel_pfit.h +++ b/drivers/gpu/drm/i915/display/intel_pfit.h @@ -6,8 +6,12 @@ #ifndef __INTEL_PFIT_H__ #define __INTEL_PFIT_H__ +enum drm_mode_status; +struct drm_display_mode; struct drm_connector_state; struct intel_crtc_state; +struct intel_display; +enum intel_output_format; int intel_pfit_compute_config(struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); @@ -17,5 +21,9 @@ void ilk_pfit_get_config(struct intel_crtc_state *crtc_state); void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state); void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state); void i9xx_pfit_get_config(struct intel_crtc_state *crtc_state); - +enum drm_mode_status +intel_pfit_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes); #endif /* __INTEL_PFIT_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 36fb07471deb..81f05ee9a21a 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_object.h" #include "i915_scheduler_types.h" -#include "i915_vma.h" #include "i9xx_plane_regs.h" #include "intel_bo.h" #include "intel_cdclk.h" @@ -1749,8 +1748,3 @@ int intel_plane_atomic_check(struct intel_atomic_state *state) return 0; } - -u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state) -{ - return i915_ggtt_offset(plane_state->ggtt_vma); -} diff --git a/drivers/gpu/drm/i915/display/intel_plane.h b/drivers/gpu/drm/i915/display/intel_plane.h index 4ef012c08fa4..8af41ccc0a69 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.h +++ b/drivers/gpu/drm/i915/display/intel_plane.h @@ -87,7 +87,6 @@ int intel_plane_add_affected(struct intel_atomic_state *state, struct intel_crtc *crtc); int intel_plane_atomic_check(struct intel_atomic_state *state); -u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state); bool intel_plane_format_mod_supported_async(struct drm_plane *plane, u32 format, u64 modifier); diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 4246173ed311..a9f36b1b50c1 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -360,6 +360,8 @@ valid_fb: i915_vma_pin_fence(vma) == 0 && vma->fence) plane_state->flags |= PLANE_HAS_FENCE; + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma); + plane_state->uapi.src_x = 0; plane_state->uapi.src_y = 0; plane_state->uapi.src_w = fb->width << 16; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index b64d0b30f5b1..327e0de86f1e 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -4,6 +4,7 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> #include <drm/drm_print.h> @@ -608,6 +609,8 @@ static void wait_panel_status(struct intel_dp *intel_dp, struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); i915_reg_t pp_stat_reg, pp_ctrl_reg; + int ret; + u32 val; lockdep_assert_held(&display->pps.mutex); @@ -624,13 +627,18 @@ static void wait_panel_status(struct intel_dp *intel_dp, intel_de_read(display, pp_stat_reg), intel_de_read(display, pp_ctrl_reg)); - if (intel_de_wait(display, pp_stat_reg, mask, value, 5000)) + ret = poll_timeout_us(val = intel_de_read(display, pp_stat_reg), + (val & mask) == value, + 10 * 1000, 5000 * 1000, true); + if (ret) { drm_err(display->drm, "[ENCODER:%d:%s] %s panel status timeout: PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n", dig_port->base.base.base.id, dig_port->base.base.name, pps_name(intel_dp), intel_de_read(display, pp_stat_reg), intel_de_read(display, pp_ctrl_reg)); + return; + } drm_dbg_kms(display->drm, "Wait complete\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 41988e193a41..22433fe2ee14 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -233,16 +233,12 @@ bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, static bool psr_global_enabled(struct intel_dp *intel_dp) { - struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DEFAULT: - if (display->params.enable_psr == -1) - return intel_dp_is_edp(intel_dp) ? - connector->panel.vbt.psr.enable : - true; - return display->params.enable_psr; + return intel_dp_is_edp(intel_dp) ? + connector->panel.vbt.psr.enable : true; case I915_PSR_DEBUG_DISABLE: return false; default: @@ -250,39 +246,23 @@ static bool psr_global_enabled(struct intel_dp *intel_dp) } } -static bool psr2_global_enabled(struct intel_dp *intel_dp) +static bool sel_update_global_enabled(struct intel_dp *intel_dp) { - struct intel_display *display = to_intel_display(intel_dp); - switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DISABLE: case I915_PSR_DEBUG_FORCE_PSR1: return false; default: - if (display->params.enable_psr == 1) - return false; return true; } } -static bool psr2_su_region_et_global_enabled(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - if (display->params.enable_psr != -1) - return false; - - return true; -} - static bool panel_replay_global_enabled(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - if ((display->params.enable_psr != -1) || - (intel_dp->psr.debug & I915_PSR_DEBUG_PANEL_REPLAY_DISABLE)) - return false; - return true; + return !(intel_dp->psr.debug & I915_PSR_DEBUG_PANEL_REPLAY_DISABLE) && + display->params.enable_panel_replay; } static u32 psr_irq_psr_error_bit_get(struct intel_dp *intel_dp) @@ -600,6 +580,16 @@ exit: static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); + int ret; + + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, + &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); + if (ret < 0) + return; + + if (!(intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & + DP_PANEL_REPLAY_SUPPORT)) + return; if (intel_dp_is_edp(intel_dp)) { if (!intel_alpm_aux_less_wake_supported(intel_dp)) { @@ -631,6 +621,15 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) static void _psr_init_dpcd(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); + int ret; + + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + if (ret < 0) + return; + + if (!intel_dp->psr_dpcd[0]) + return; drm_dbg_kms(display->drm, "eDP panel supports PSR version %x\n", intel_dp->psr_dpcd[0]); @@ -676,18 +675,9 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp) void intel_psr_init_dpcd(struct intel_dp *intel_dp) { - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - - drm_dp_dpcd_read(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, - &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); - - if (intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & - DP_PANEL_REPLAY_SUPPORT) - _panel_replay_init_dpcd(intel_dp); + _psr_init_dpcd(intel_dp); - if (intel_dp->psr_dpcd[0]) - _psr_init_dpcd(intel_dp); + _panel_replay_init_dpcd(intel_dp); if (intel_dp->psr.sink_psr2_support || intel_dp->psr.sink_panel_replay_su_support) @@ -742,8 +732,7 @@ static bool psr2_su_region_et_valid(struct intel_dp *intel_dp, bool panel_replay return panel_replay ? intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & DP_PANEL_REPLAY_EARLY_TRANSPORT_SUPPORT : - intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_ET_SUPPORTED && - psr2_su_region_et_global_enabled(intel_dp); + intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_ET_SUPPORTED; } static void _panel_replay_enable_sink(struct intel_dp *intel_dp, @@ -936,7 +925,7 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) /* Wa_16025596647 */ if ((DISPLAY_VER(display) == 20 || IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && - is_dc5_dc6_blocked(intel_dp)) + is_dc5_dc6_blocked(intel_dp) && intel_dp->psr.pkg_c_latency_used) intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display, intel_dp->psr.pipe, true); @@ -1026,7 +1015,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) /* Wa_16025596647 */ if ((DISPLAY_VER(display) == 20 || IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && - is_dc5_dc6_blocked(intel_dp)) + is_dc5_dc6_blocked(intel_dp) && intel_dp->psr.pkg_c_latency_used) idle_frames = 0; else idle_frames = psr_compute_idle_frames(intel_dp); @@ -1423,7 +1412,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay; int psr_max_h = 0, psr_max_v = 0, max_bpp = 0; - if (!intel_dp->psr.sink_psr2_support) + if (!intel_dp->psr.sink_psr2_support || display->params.enable_psr == 1) return false; /* JSL and EHL only supports eDP 1.3 */ @@ -1528,7 +1517,7 @@ static bool intel_sel_update_config_valid(struct intel_dp *intel_dp, goto unsupported; } - if (!psr2_global_enabled(intel_dp)) { + if (!sel_update_global_enabled(intel_dp)) { drm_dbg_kms(display->drm, "Selective update disabled by flag\n"); goto unsupported; @@ -1576,7 +1565,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int entry_setup_frames; - if (!CAN_PSR(intel_dp)) + if (!CAN_PSR(intel_dp) || !display->params.enable_psr) return false; /* @@ -1808,6 +1797,8 @@ static void intel_psr_activate(struct intel_dp *intel_dp) drm_WARN_ON(display->drm, intel_dp->psr.active); + drm_WARN_ON(display->drm, !intel_dp->psr.enabled); + lockdep_assert_held(&intel_dp->psr.lock); /* psr1, psr2 and panel-replay are mutually exclusive.*/ @@ -2027,6 +2018,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.req_psr2_sdp_prior_scanline = crtc_state->req_psr2_sdp_prior_scanline; intel_dp->psr.active_non_psr_pipes = crtc_state->active_non_psr_pipes; + intel_dp->psr.pkg_c_latency_used = crtc_state->pkg_c_latency_used; if (!psr_interrupt_error_check(intel_dp)) return; @@ -2103,8 +2095,9 @@ static void intel_psr_exit(struct intel_dp *intel_dp) drm_WARN_ON(display->drm, !(val & EDP_PSR2_ENABLE)); } else { - if (DISPLAY_VER(display) == 20 || - IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) + if ((DISPLAY_VER(display) == 20 || + IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && + intel_dp->psr.pkg_c_latency_used) intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display, intel_dp->psr.pipe, false); @@ -2207,6 +2200,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_dp->psr.su_region_et_enabled = false; intel_dp->psr.psr2_sel_fetch_cff_enabled = false; intel_dp->psr.active_non_psr_pipes = 0; + intel_dp->psr.pkg_c_latency_used = 0; } /** @@ -3099,7 +3093,7 @@ static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) /* After the unlocked wait, verify that PSR is still wanted! */ mutex_lock(&intel_dp->psr.lock); - return err == 0 && intel_dp->psr.enabled; + return err == 0 && intel_dp->psr.enabled && !intel_dp->psr.pause_counter; } static int intel_psr_fastset_force(struct intel_display *display) @@ -3228,8 +3222,13 @@ static void intel_psr_work(struct work_struct *work) if (!intel_dp->psr.enabled) goto unlock; - if (READ_ONCE(intel_dp->psr.irq_aux_error)) + if (READ_ONCE(intel_dp->psr.irq_aux_error)) { intel_psr_handle_irq(intel_dp); + goto unlock; + } + + if (intel_dp->psr.pause_counter) + goto unlock; /* * We have to make sure PSR is ready for re-enable @@ -3723,7 +3722,7 @@ static void intel_psr_apply_underrun_on_idle_wa_locked(struct intel_dp *intel_dp struct intel_display *display = to_intel_display(intel_dp); bool dc5_dc6_blocked; - if (!intel_dp->psr.active) + if (!intel_dp->psr.active || !intel_dp->psr.pkg_c_latency_used) return; dc5_dc6_blocked = is_dc5_dc6_blocked(intel_dp); @@ -3748,7 +3747,8 @@ static void psr_dc5_dc6_wa_work(struct work_struct *work) mutex_lock(&intel_dp->psr.lock); - if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled) + if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled && + !intel_dp->psr.pkg_c_latency_used) intel_psr_apply_underrun_on_idle_wa_locked(intel_dp); mutex_unlock(&intel_dp->psr.lock); @@ -3826,7 +3826,8 @@ void intel_psr_notify_pipe_change(struct intel_atomic_state *state, goto unlock; if ((enable && intel_dp->psr.active_non_psr_pipes) || - (!enable && !intel_dp->psr.active_non_psr_pipes)) { + (!enable && !intel_dp->psr.active_non_psr_pipes) || + !intel_dp->psr.pkg_c_latency_used) { intel_dp->psr.active_non_psr_pipes = active_non_psr_pipes; goto unlock; } @@ -3861,7 +3862,7 @@ void intel_psr_notify_vblank_enable_disable(struct intel_display *display, break; } - if (intel_dp->psr.enabled) + if (intel_dp->psr.enabled && intel_dp->psr.pkg_c_latency_used) intel_psr_apply_underrun_on_idle_wa_locked(intel_dp); mutex_unlock(&intel_dp->psr.lock); @@ -4157,12 +4158,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, void intel_psr_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_edp_psr_debug", 0644, minor->debugfs_root, + debugfs_create_file("i915_edp_psr_debug", 0644, debugfs_root, display, &i915_edp_psr_debug_fops); - debugfs_create_file("i915_edp_psr_status", 0444, minor->debugfs_root, + debugfs_create_file("i915_edp_psr_status", 0444, debugfs_root, display, &i915_edp_psr_status_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index a32fae510ed2..d2e16b79d6be 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -80,6 +80,12 @@ static void quirk_fw_sync_len(struct intel_dp *intel_dp) drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); } +static void quirk_edp_limit_rate_hbr2(struct intel_display *display) +{ + intel_set_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2); + drm_info(display->drm, "Applying eDP Limit rate to HBR2 quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -231,6 +237,9 @@ static struct intel_quirk intel_quirks[] = { { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, /* HP Notebook - 14-r206nv */ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, + + /* Dell XPS 13 7390 2-in-1 */ + { 0x8a12, 0x1028, 0x08b0, quirk_edp_limit_rate_hbr2 }, }; static const struct intel_dpcd_quirk intel_dpcd_quirks[] = { diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h index cafdebda7535..06da0e286c67 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.h +++ b/drivers/gpu/drm/i915/display/intel_quirks.h @@ -20,6 +20,7 @@ enum intel_quirk_id { QUIRK_LVDS_SSC_DISABLE, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK, QUIRK_FW_SYNC_LEN, + QUIRK_EDP_LIMIT_RATE_HBR2, }; void intel_init_quirks(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 87aff2754f69..6c032d81e7ee 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -47,11 +47,11 @@ #include "intel_display_driver.h" #include "intel_display_regs.h" #include "intel_display_types.h" -#include "intel_fdi.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" #include "intel_hdmi.h" #include "intel_hotplug.h" +#include "intel_link_bw.h" #include "intel_panel.h" #include "intel_sdvo.h" #include "intel_sdvo_regs.h" @@ -1367,7 +1367,7 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(display)) { pipe_config->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(pipe_config)) + if (!intel_link_bw_compute_pipe_bpp(pipe_config)) return -EINVAL; } @@ -2052,8 +2052,10 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, - &intel_sdvo->hotplug_active, 2); + if (!intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, + &intel_sdvo->hotplug_active, 2)) + drm_warn(intel_sdvo->base.base.dev, + "Failed to enable hotplug on SDVO encoder\n"); } static enum intel_hotplug_state diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index e6844df837af..75bbaa923204 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -264,8 +264,7 @@ static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return sprctl; } -static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 vlv_sprite_ctl(const struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; unsigned int rotation = plane_state->hw.rotation; @@ -395,15 +394,12 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 sprsurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 sprctl, linear_offset; + u32 sprctl; sprctl = plane_state->ctl | vlv_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (display->platform.cherryview && pipe == PIPE_B) chv_sprite_update_csc(plane_state); @@ -418,7 +414,8 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, SPCONSTALPHA(pipe, plane_id), 0); - intel_de_write_fw(display, SPLINOFF(pipe, plane_id), linear_offset); + intel_de_write_fw(display, SPLINOFF(pipe, plane_id), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, SPTILEOFF(pipe, plane_id), SP_OFFSET_Y(y) | SP_OFFSET_X(x)); @@ -428,8 +425,7 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, SPCNTR(pipe, plane_id), sprctl); - intel_de_write_fw(display, SPSURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + intel_de_write_fw(display, SPSURF(pipe, plane_id), plane_state->surf); vlv_sprite_update_clrc(plane_state); vlv_sprite_update_gamma(plane_state); @@ -663,8 +659,7 @@ static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state) (display->platform.ivybridge || display->platform.haswell); } -static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ivb_sprite_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -830,15 +825,12 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 sprsurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 sprctl, linear_offset; + u32 sprctl; sprctl = plane_state->ctl | ivb_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (key->flags) { intel_de_write_fw(display, SPRKEYVAL(pipe), key->min_value); intel_de_write_fw(display, SPRKEYMSK(pipe), @@ -852,7 +844,8 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, SPROFFSET(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } else { - intel_de_write_fw(display, SPRLINOFF(pipe), linear_offset); + intel_de_write_fw(display, SPRLINOFF(pipe), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, SPRTILEOFF(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } @@ -863,8 +856,7 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, SPRCTL(pipe), sprctl); - intel_de_write_fw(display, SPRSURF(pipe), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + intel_de_write_fw(display, SPRSURF(pipe), plane_state->surf); ivb_sprite_update_gamma(plane_state); } @@ -1016,8 +1008,7 @@ static u32 g4x_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return dvscntr; } -static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 g4x_sprite_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1181,15 +1172,12 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 dvssurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 dvscntr, linear_offset; + u32 dvscntr; dvscntr = plane_state->ctl | g4x_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (key->flags) { intel_de_write_fw(display, DVSKEYVAL(pipe), key->min_value); intel_de_write_fw(display, DVSKEYMSK(pipe), @@ -1197,7 +1185,8 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, DVSKEYMAX(pipe), key->max_value); } - intel_de_write_fw(display, DVSLINOFF(pipe), linear_offset); + intel_de_write_fw(display, DVSLINOFF(pipe), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, DVSTILEOFF(pipe), DVS_OFFSET_Y(y) | DVS_OFFSET_X(x)); @@ -1207,8 +1196,7 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, DVSCNTR(pipe), dvscntr); - intel_de_write_fw(display, DVSSURF(pipe), - intel_plane_ggtt_offset(plane_state) + dvssurf_offset); + intel_de_write_fw(display, DVSSURF(pipe), plane_state->surf); if (display->platform.g4x) g4x_sprite_update_gamma(plane_state); @@ -1387,9 +1375,9 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, return ret; if (DISPLAY_VER(display) >= 7) - plane_state->ctl = ivb_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = ivb_sprite_ctl(plane_state); else - plane_state->ctl = g4x_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = g4x_sprite_ctl(plane_state); return 0; } @@ -1439,7 +1427,7 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - plane_state->ctl = vlv_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = vlv_sprite_ctl(plane_state); return 0; } @@ -1624,6 +1612,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = vlv_sprite_capture_error; plane->get_hw_state = vlv_sprite_get_hw_state; plane->check_plane = vlv_sprite_check; + plane->surf_offset = i965_plane_surf_offset; plane->max_stride = i965_plane_max_stride; plane->min_alignment = vlv_plane_min_alignment; plane->min_cdclk = vlv_plane_min_cdclk; @@ -1648,6 +1637,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = ivb_sprite_capture_error; plane->get_hw_state = ivb_sprite_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->surf_offset = i965_plane_surf_offset; if (display->platform.broadwell || display->platform.haswell) { plane->max_stride = hsw_sprite_max_stride; @@ -1673,6 +1663,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = g4x_sprite_capture_error; plane->get_hw_state = g4x_sprite_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->surf_offset = i965_plane_surf_offset; plane->max_stride = g4x_sprite_max_stride; plane->min_alignment = g4x_sprite_min_alignment; plane->min_cdclk = g4x_sprite_min_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3bc57579fe53..c4a5601c5107 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -3,6 +3,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/iopoll.h> + #include <drm/drm_print.h> #include "i915_reg.h" @@ -23,10 +25,6 @@ #include "intel_modeset_lock.h" #include "intel_tc.h" -#define DP_PIN_ASSIGNMENT_C 0x3 -#define DP_PIN_ASSIGNMENT_D 0x4 -#define DP_PIN_ASSIGNMENT_E 0x5 - enum tc_port_mode { TC_PORT_DISCONNECTED, TC_PORT_TBT_ALT, @@ -65,7 +63,9 @@ struct intel_tc_port { enum tc_port_mode mode; enum tc_port_mode init_mode; enum phy_fia phy_fia; + enum intel_tc_pin_assignment pin_assignment; u8 phy_fia_idx; + u8 max_lane_count; }; static enum intel_display_power_domain @@ -251,6 +251,9 @@ tc_port_power_domain(struct intel_tc_port *tc) { enum tc_port tc_port = intel_encoder_to_tc(&tc->dig_port->base); + if (tc_port == TC_PORT_NONE) + return POWER_DOMAIN_INVALID; + return POWER_DOMAIN_PORT_DDI_LANES_TC1 + tc_port - TC_PORT_1; } @@ -263,13 +266,14 @@ assert_tc_port_power_enabled(struct intel_tc_port *tc) !intel_display_power_is_enabled(display, tc_port_power_domain(tc))); } -static u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) +static u32 get_lane_mask(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); + struct intel_display *display = to_intel_display(tc->dig_port); + intel_wakeref_t wakeref; u32 lane_mask; - lane_mask = intel_de_read(display, PORT_TX_DFLEXDPSP(tc->phy_fia)); + with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) + lane_mask = intel_de_read(display, PORT_TX_DFLEXDPSP(tc->phy_fia)); drm_WARN_ON(display->drm, lane_mask == 0xffffffff); assert_tc_cold_blocked(tc); @@ -278,75 +282,87 @@ static u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(tc->phy_fia_idx); } -u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) +static char pin_assignment_name(enum intel_tc_pin_assignment pin_assignment) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); - u32 pin_mask; - - pin_mask = intel_de_read(display, PORT_TX_DFLEXPA1(tc->phy_fia)); - - drm_WARN_ON(display->drm, pin_mask == 0xffffffff); - assert_tc_cold_blocked(tc); + if (pin_assignment == INTEL_TC_PIN_ASSIGNMENT_NONE) + return '-'; - return (pin_mask & DP_PIN_ASSIGNMENT_MASK(tc->phy_fia_idx)) >> - DP_PIN_ASSIGNMENT_SHIFT(tc->phy_fia_idx); + return 'A' + pin_assignment - INTEL_TC_PIN_ASSIGNMENT_A; } -static int lnl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static enum intel_tc_pin_assignment +get_pin_assignment(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - enum tc_port tc_port = intel_encoder_to_tc(&dig_port->base); + struct intel_display *display = to_intel_display(tc->dig_port); + enum tc_port tc_port = intel_encoder_to_tc(&tc->dig_port->base); + enum intel_tc_pin_assignment pin_assignment; intel_wakeref_t wakeref; - u32 val, pin_assignment; + i915_reg_t reg; + u32 mask; + u32 val; + + if (tc->mode == TC_PORT_TBT_ALT) + return INTEL_TC_PIN_ASSIGNMENT_NONE; + + if (DISPLAY_VER(display) >= 20) { + reg = TCSS_DDI_STATUS(tc_port); + mask = TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK; + } else { + reg = PORT_TX_DFLEXPA1(tc->phy_fia); + mask = DP_PIN_ASSIGNMENT_MASK(tc->phy_fia_idx); + } with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - val = intel_de_read(display, TCSS_DDI_STATUS(tc_port)); + val = intel_de_read(display, reg); - pin_assignment = - REG_FIELD_GET(TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK, val); + drm_WARN_ON(display->drm, val == 0xffffffff); + assert_tc_cold_blocked(tc); + + pin_assignment = (val & mask) >> (ffs(mask) - 1); switch (pin_assignment) { + case INTEL_TC_PIN_ASSIGNMENT_A: + case INTEL_TC_PIN_ASSIGNMENT_B: + case INTEL_TC_PIN_ASSIGNMENT_F: + drm_WARN_ON(display->drm, DISPLAY_VER(display) > 11); + break; + case INTEL_TC_PIN_ASSIGNMENT_NONE: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_E: + break; default: MISSING_CASE(pin_assignment); - fallthrough; - case DP_PIN_ASSIGNMENT_D: - return 2; - case DP_PIN_ASSIGNMENT_C: - case DP_PIN_ASSIGNMENT_E: - return 4; } + + return pin_assignment; } -static int mtl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static int mtl_get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - intel_wakeref_t wakeref; - u32 pin_mask; + enum intel_tc_pin_assignment pin_assignment; - with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - pin_mask = intel_tc_port_get_pin_assignment_mask(dig_port); + pin_assignment = get_pin_assignment(tc); - switch (pin_mask) { + switch (pin_assignment) { + case INTEL_TC_PIN_ASSIGNMENT_NONE: + return 0; default: - MISSING_CASE(pin_mask); + MISSING_CASE(pin_assignment); fallthrough; - case DP_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_D: return 2; - case DP_PIN_ASSIGNMENT_C: - case DP_PIN_ASSIGNMENT_E: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_E: return 4; } } -static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static int icl_get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - intel_wakeref_t wakeref; u32 lane_mask = 0; - with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - lane_mask = intel_tc_port_get_lane_mask(dig_port); + lane_mask = get_lane_mask(tc); switch (lane_mask) { default: @@ -365,23 +381,44 @@ static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) } } +static int get_max_lane_count(struct intel_tc_port *tc) +{ + struct intel_display *display = to_intel_display(tc->dig_port); + + if (tc->mode != TC_PORT_DP_ALT) + return 4; + + if (DISPLAY_VER(display) >= 14) + return mtl_get_max_lane_count(tc); + + return icl_get_max_lane_count(tc); +} + +static void read_pin_configuration(struct intel_tc_port *tc) +{ + tc->pin_assignment = get_pin_assignment(tc); + tc->max_lane_count = get_max_lane_count(tc); +} + int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) { - struct intel_display *display = to_intel_display(dig_port); struct intel_tc_port *tc = to_tc_port(dig_port); - if (!intel_encoder_is_tc(&dig_port->base) || tc->mode != TC_PORT_DP_ALT) + if (!intel_encoder_is_tc(&dig_port->base)) return 4; - assert_tc_cold_blocked(tc); + return tc->max_lane_count; +} - if (DISPLAY_VER(display) >= 20) - return lnl_tc_port_get_max_lane_count(dig_port); +enum intel_tc_pin_assignment +intel_tc_port_get_pin_assignment(struct intel_digital_port *dig_port) +{ + struct intel_tc_port *tc = to_tc_port(dig_port); - if (DISPLAY_VER(display) >= 14) - return mtl_tc_port_get_max_lane_count(dig_port); + if (!intel_encoder_is_tc(&dig_port->base)) + return INTEL_TC_PIN_ASSIGNMENT_NONE; - return intel_tc_port_get_max_lane_count(dig_port); + return tc->pin_assignment; } void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, @@ -596,9 +633,12 @@ static void icl_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + __tc_cold_unblock(tc, domain, tc_cold_wref); } @@ -656,8 +696,11 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if ((!tc_phy_is_ready(tc) || !icl_tc_phy_take_ownership(tc, true)) && @@ -668,6 +711,7 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, goto out_unblock_tc_cold; } + read_pin_configuration(tc); if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -858,9 +902,12 @@ static void adlp_tc_phy_get_hw_state(struct intel_tc_port *tc) port_wakeref = intel_display_power_get(display, port_power_domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + intel_display_power_put(display, port_power_domain, port_wakeref); } @@ -873,6 +920,9 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) if (tc->mode == TC_PORT_TBT_ALT) { tc->lock_wakeref = tc_cold_block(tc); + + read_pin_configuration(tc); + return true; } @@ -894,6 +944,8 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_unblock_tc_cold; @@ -1000,8 +1052,13 @@ static bool xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled) { struct intel_display *display = to_intel_display(tc->dig_port); + bool is_enabled; + int ret; - if (wait_for(xelpdp_tc_phy_tcss_power_is_enabled(tc) == enabled, 5)) { + ret = poll_timeout_us(is_enabled = xelpdp_tc_phy_tcss_power_is_enabled(tc), + is_enabled == enabled, + 200, 5000, false); + if (ret) { drm_dbg_kms(display->drm, "Port %s: timeout waiting for TCSS power to get %s\n", str_enabled_disabled(enabled), @@ -1124,9 +1181,18 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + /* + * Set a valid lane count value for a DP-alt sink which got + * disconnected. The driver can only disable the output on this PHY. + */ + if (tc->max_lane_count == 0) + tc->max_lane_count = 4; + } + drm_WARN_ON(display->drm, (tc->mode == TC_PORT_DP_ALT || tc->mode == TC_PORT_LEGACY) && !xelpdp_tc_phy_tcss_power_is_enabled(tc)); @@ -1138,14 +1204,19 @@ static bool xelpdp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) { tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if (!xelpdp_tc_phy_enable_tcss_power(tc, true)) goto out_unblock_tccold; xelpdp_tc_phy_take_ownership(tc, true); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -1226,14 +1297,19 @@ static void tc_phy_get_hw_state(struct intel_tc_port *tc) tc->phy_ops->get_hw_state(tc); } -static bool tc_phy_is_ready_and_owned(struct intel_tc_port *tc, - bool phy_is_ready, bool phy_is_owned) +/* Is the PHY owned by display i.e. is it in legacy or DP-alt mode? */ +static bool tc_phy_owned_by_display(struct intel_tc_port *tc, + bool phy_is_ready, bool phy_is_owned) { struct intel_display *display = to_intel_display(tc->dig_port); - drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); + if (DISPLAY_VER(display) < 20) { + drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); - return phy_is_ready && phy_is_owned; + return phy_is_ready && phy_is_owned; + } else { + return phy_is_owned; + } } static bool tc_phy_is_connected(struct intel_tc_port *tc, @@ -1244,7 +1320,7 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, bool phy_is_owned = tc_phy_is_owned(tc); bool is_connected; - if (tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) + if (tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) is_connected = port_pll_type == ICL_PORT_DPLL_MG_PHY; else is_connected = port_pll_type == ICL_PORT_DPLL_DEFAULT; @@ -1263,8 +1339,13 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, static bool tc_phy_wait_for_ready(struct intel_tc_port *tc) { struct intel_display *display = to_intel_display(tc->dig_port); + bool is_ready; + int ret; - if (wait_for(tc_phy_is_ready(tc), 500)) { + ret = poll_timeout_us(is_ready = tc_phy_is_ready(tc), + is_ready, + 1000, 500 * 1000, false); + if (ret) { drm_err(display->drm, "Port %s: timeout waiting for PHY ready\n", tc->port_name); @@ -1352,7 +1433,7 @@ tc_phy_get_current_mode(struct intel_tc_port *tc) phy_is_ready = tc_phy_is_ready(tc); phy_is_owned = tc_phy_is_owned(tc); - if (!tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) { + if (!tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) { mode = get_tc_mode_in_phy_not_owned_state(tc, live_mode); } else { drm_WARN_ON(display->drm, live_mode == TC_PORT_TBT_ALT); @@ -1441,21 +1522,24 @@ static void intel_tc_port_reset_mode(struct intel_tc_port *tc, intel_display_power_flush_work(display); if (!intel_tc_cold_requires_aux_pw(dig_port)) { enum intel_display_power_domain aux_domain; - bool aux_powered; aux_domain = intel_aux_power_domain(dig_port); - aux_powered = intel_display_power_is_enabled(display, aux_domain); - drm_WARN_ON(display->drm, aux_powered); + if (intel_display_power_is_enabled(display, aux_domain)) + drm_dbg_kms(display->drm, "Port %s: AUX unexpectedly powered\n", + tc->port_name); } tc_phy_disconnect(tc); if (!force_disconnect) tc_phy_connect(tc, required_lanes); - drm_dbg_kms(display->drm, "Port %s: TC port mode reset (%s -> %s)\n", + drm_dbg_kms(display->drm, + "Port %s: TC port mode reset (%s -> %s) pin assignment: %c max lanes: %d\n", tc->port_name, tc_port_mode_name(old_tc_mode), - tc_port_mode_name(tc->mode)); + tc_port_mode_name(tc->mode), + pin_assignment_name(tc->pin_assignment), + tc->max_lane_count); } static bool intel_tc_port_needs_reset(struct intel_tc_port *tc) @@ -1610,9 +1694,11 @@ void intel_tc_port_sanitize_mode(struct intel_digital_port *dig_port, __intel_tc_port_put_link(tc); } - drm_dbg_kms(display->drm, "Port %s: sanitize mode (%s)\n", + drm_dbg_kms(display->drm, "Port %s: sanitize mode (%s) pin assignment: %c max lanes: %d\n", tc->port_name, - tc_port_mode_name(tc->mode)); + tc_port_mode_name(tc->mode), + pin_assignment_name(tc->pin_assignment), + tc->max_lane_count); mutex_unlock(&tc->lock); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 26c4265368c1..fff8b96e4972 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -12,6 +12,75 @@ struct intel_crtc_state; struct intel_digital_port; struct intel_encoder; +/* + * The following enum values must stay fixed, as they match the corresponding + * pin assignment fields in the PORT_TX_DFLEXPA1 and TCSS_DDI_STATUS registers. + */ +enum intel_tc_pin_assignment { /* Lanes (a) Signal/ Cable Notes */ + /* DP USB Rate (b) type */ + INTEL_TC_PIN_ASSIGNMENT_NONE = 0, /* 4 - - - (c) */ + INTEL_TC_PIN_ASSIGNMENT_A, /* 2/4 0 GEN2 TC->TC (d,e) */ + INTEL_TC_PIN_ASSIGNMENT_B, /* 1/2 1 GEN2 TC->TC (d,f,g) */ + INTEL_TC_PIN_ASSIGNMENT_C, /* 4 0 DP2 TC->TC (h) */ + INTEL_TC_PIN_ASSIGNMENT_D, /* 2 1 DP2 TC->TC (h,g) */ + INTEL_TC_PIN_ASSIGNMENT_E, /* 4 0 DP2 TC->DP */ + INTEL_TC_PIN_ASSIGNMENT_F, /* 2 1 GEN1/DP1 TC->DP (d,g,i) */ + /* + * (a) - DP unidirectional lanes, each lane using 1 differential signal + * pair. + * - USB SuperSpeed bidirectional lane, using 2 differential (TX and + * RX) signal pairs. + * - USB 2.0 (HighSpeed) unidirectional lane, using 1 differential + * signal pair. Not indicated, this lane is always present on pin + * assignments A-D and never present on pin assignments E/F. + * (b) - GEN1: USB 3.1 GEN1 bit rate (5 Gbps) and signaling. This + * is used for transferring only a USB stream. + * - GEN2: USB 3.1 GEN2 bit rate (10 Gbps) and signaling. This + * allows transferring an HBR3 (8.1 Gbps) DP stream. + * - DP1: Display Port signaling defined by the DP v1.3 Standard, + * with a maximum bit rate of HBR3. + * - DP2: Display Port signaling defined by the DP v2.1 Standard, + * with a maximum bit rate defined by the DP Alt Mode + * v2.1a Standard depending on the cable type as follows: + * - Passive (Full-Featured) USB 3.2 GEN1 + * TC->TC cables (CC3G1-X) : UHBR10 + * - Passive (Full-Featured) USB 3.2/4 GEN2 and + * Thunderbolt Alt Mode GEN2 + * TC->TC cables (CC3G2-X) all : UHBR10 + * DP54 logo : UHBR13.5 + * - Passive (Full-Featured) USB4 GEN3+ and + * Thunderbolt Alt Mode GEN3+ + * TC->TC cables (CC4G3-X) all : UHBR13.5 + * DP80 logo : UHBR20 + * - Active Re-Timed or + * Active Linear Re-driven (LRD) + * USB3.2 GEN1/2 and USB4 GEN2+ + * TC->TC cables all : HBR3 + * with DP_BR CTS : UHBR10 + * DP54 logo : UHBR13.5 + * DP80 logo : UHBR20 + * - Passive/Active Re-Timed or + * Active Linear Re-driven (LRD) + * TC->DP cables with DP_BR CTS/DP8K logo : HBR3 + * with DP_BR CTS : UHBR10 + * DP54 logo : UHBR13.5 + * DP80 logo : UHBR20 + * (c) Used in TBT-alt/legacy modes and on LNL+ after the sink + * disconnected in DP-alt mode. + * (d) Only defined by the DP Alt Standard v1.0a, deprecated by v1.0b, + * only supported on ICL. + * (e) GEN2 passive 1 m cable: 4 DP lanes, GEN2 active cable: 2 DP lanes. + * (f) GEN2 passive 1 m cable: 2 DP lanes, GEN2 active cable: 1 DP lane. + * (g) These pin assignments are also referred to as (USB/DP) + * multifunction or Multifunction Display Port (MFD) modes. + * (h) Also used where one end of the cable is a captive connector, + * attached to a DP->HDMI/DVI/VGA converter. + * (i) The DP end of the cable is a captive connector attached to a + * (DP/USB) multifunction dock as defined by the DockPort v1.0a + * specification. + */ +}; + bool intel_tc_port_in_tbt_alt_mode(struct intel_digital_port *dig_port); bool intel_tc_port_in_dp_alt_mode(struct intel_digital_port *dig_port); bool intel_tc_port_in_legacy_mode(struct intel_digital_port *dig_port); @@ -19,7 +88,8 @@ bool intel_tc_port_handles_hpd_glitches(struct intel_digital_port *dig_port); bool intel_tc_port_connected(struct intel_encoder *encoder); -u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); +enum intel_tc_pin_assignment +intel_tc_port_get_pin_assignment(struct intel_digital_port *dig_port); int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port); void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes); diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 70ba7aa26bf4..c15234c1d96e 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -3,9 +3,12 @@ * Copyright © 2022-2023 Intel Corporation */ +#include <linux/iopoll.h> + #include <drm/drm_vblank.h> #include "i915_drv.h" +#include "i915_utils.h" #include "intel_color.h" #include "intel_crtc.h" #include "intel_de.h" @@ -492,9 +495,14 @@ static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { struct intel_display *display = to_intel_display(crtc); enum pipe pipe = crtc->pipe; + bool is_moving; + int ret; /* Wait for the display line to settle/start moving */ - if (wait_for(pipe_scanline_is_moving(display, pipe) == state, 100)) + ret = poll_timeout_us(is_moving = pipe_scanline_is_moving(display, pipe), + is_moving == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "pipe %c scanline %s wait timed out\n", pipe_name(pipe), str_on_off(state)); @@ -724,9 +732,9 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) break; if (!timeout) { - drm_err(display->drm, - "Potential atomic update failure on pipe %c\n", - pipe_name(crtc->pipe)); + drm_dbg_kms(display->drm, + "Potential atomic update failure on pipe %c\n", + pipe_name(crtc->pipe)); break; } diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 92c04811aa28..70e31520c560 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -37,7 +37,7 @@ #ifndef _INTEL_VBT_DEFS_H_ #define _INTEL_VBT_DEFS_H_ -#include "intel_bios.h" +#include "intel_dsi_vbt_defs.h" /* EDID derived structures */ struct bdb_edid_pnp_id { @@ -437,6 +437,22 @@ enum vbt_gmbus_ddi { #define BDB_230_VBT_DP_MAX_LINK_RATE_UHBR13P5 6 #define BDB_230_VBT_DP_MAX_LINK_RATE_UHBR20 7 +/* EDP link rate 263+ */ +#define BDB_263_VBT_EDP_LINK_RATE_1_62 BIT_U32(0) +#define BDB_263_VBT_EDP_LINK_RATE_2_16 BIT_U32(1) +#define BDB_263_VBT_EDP_LINK_RATE_2_43 BIT_U32(2) +#define BDB_263_VBT_EDP_LINK_RATE_2_7 BIT_U32(3) +#define BDB_263_VBT_EDP_LINK_RATE_3_24 BIT_U32(4) +#define BDB_263_VBT_EDP_LINK_RATE_4_32 BIT_U32(5) +#define BDB_263_VBT_EDP_LINK_RATE_5_4 BIT_U32(6) +#define BDB_263_VBT_EDP_LINK_RATE_6_75 BIT_U32(7) +#define BDB_263_VBT_EDP_LINK_RATE_8_1 BIT_U32(8) +#define BDB_263_VBT_EDP_LINK_RATE_10 BIT_U32(9) +#define BDB_263_VBT_EDP_LINK_RATE_13_5 BIT_U32(10) +#define BDB_263_VBT_EDP_LINK_RATE_20 BIT_U32(11) +#define BDB_263_VBT_EDP_NUM_RATES 12 +#define BDB_263_VBT_EDP_RATES_MASK GENMASK(BDB_263_VBT_EDP_NUM_RATES - 1, 0) + /* * The child device config, aka the display device data structure, provides a * description of a port and its configuration on the platform. @@ -547,6 +563,8 @@ struct child_device_config { u8 dp_max_link_rate:3; /* 216+ */ u8 dp_max_link_rate_reserved:5; /* 216+ */ u8 efp_index; /* 256+ */ + u32 edp_data_rate_override:12; /* 263+ */ + u32 edp_data_rate_override_reserved:20; /* 263+ */ } __packed; struct bdb_general_definitions { diff --git a/drivers/gpu/drm/i915/display/intel_wm.c b/drivers/gpu/drm/i915/display/intel_wm.c index bba82e888db2..f887a664fe22 100644 --- a/drivers/gpu/drm/i915/display/intel_wm.c +++ b/drivers/gpu/drm/i915/display/intel_wm.c @@ -5,7 +5,6 @@ #include <linux/debugfs.h> -#include <drm/drm_file.h> #include <drm/drm_print.h> #include "i9xx_wm.h" @@ -390,15 +389,15 @@ static const struct file_operations i915_cur_wm_latency_fops = { void intel_wm_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_pri_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_pri_wm_latency", 0644, debugfs_root, display, &i915_pri_wm_latency_fops); - debugfs_create_file("i915_spr_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_spr_wm_latency", 0644, debugfs_root, display, &i915_spr_wm_latency_fops); - debugfs_create_file("i915_cur_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_cur_wm_latency", 0644, debugfs_root, display, &i915_cur_wm_latency_fops); skl_watermark_debugfs_register(display); diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index d77798499c57..c6cccf170ff1 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -10,6 +10,7 @@ #include "intel_display_regs.h" #include "intel_display_trace.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_fb.h" #include "skl_scaler.h" #include "skl_universal_plane.h" @@ -91,11 +92,9 @@ static void skl_scaler_min_src_size(const struct drm_format_info *format, } } -static void skl_scaler_max_src_size(struct intel_crtc *crtc, +static void skl_scaler_max_src_size(struct intel_display *display, int *max_w, int *max_h) { - struct intel_display *display = to_intel_display(crtc); - if (DISPLAY_VER(display) >= 14) { *max_w = 4096; *max_h = 8192; @@ -134,6 +133,23 @@ static void skl_scaler_max_dst_size(struct intel_crtc *crtc, } } +enum drm_mode_status +skl_scaler_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes) +{ + int max_h, max_w; + + if (num_joined_pipes < 2 && output_format == INTEL_OUTPUT_FORMAT_YCBCR420) { + skl_scaler_max_src_size(display, &max_w, &max_h); + if (mode->hdisplay > max_h) + return MODE_NO_420; + } + + return MODE_OK; +} + static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, @@ -201,7 +217,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, } skl_scaler_min_src_size(format, modifier, &min_src_w, &min_src_h); - skl_scaler_max_src_size(crtc, &max_src_w, &max_src_h); + skl_scaler_max_src_size(display, &max_src_w, &max_src_h); skl_scaler_min_dst_size(&min_dst_w, &min_dst_h); skl_scaler_max_dst_size(crtc, &max_dst_w, &max_dst_h); @@ -747,6 +763,9 @@ void skl_pfit_enable(const struct intel_crtc_state *crtc_state) crtc_state->scaler_state.scaler_id < 0)) return; + if (intel_display_wa(display, 14011503117)) + adl_scaler_ecc_mask(crtc_state); + drm_rect_init(&src, 0, 0, drm_rect_width(&crtc_state->pipe_src) << 16, drm_rect_height(&crtc_state->pipe_src) << 16); @@ -923,3 +942,29 @@ void skl_scaler_get_config(struct intel_crtc_state *crtc_state) else scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX); } + +void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (!crtc_state->pch_pfit.enabled) + return; + + intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); +} + +void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + + if (scaler_state->scaler_id < 0) + return; + + intel_de_write_fw(display, + SKL_PS_ECC_STAT(crtc->pipe, scaler_state->scaler_id), + 1); + intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, 0); +} diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 355ea15260ca..12a19016c5f6 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -5,10 +5,14 @@ #ifndef INTEL_SCALER_H #define INTEL_SCALER_H +enum drm_mode_status; +struct drm_display_mode; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dsb; +enum intel_output_format; struct intel_plane; struct intel_plane_state; @@ -32,4 +36,13 @@ void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); void skl_scaler_get_config(struct intel_crtc_state *crtc_state); +enum drm_mode_status +skl_scaler_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes); + +void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state); + +void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state); #endif diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e20972ddfa09..950dc79dbdd4 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -10,6 +10,7 @@ #include "pxp/intel_pxp.h" #include "i915_drv.h" +#include "i915_utils.h" #include "intel_bo.h" #include "intel_de.h" #include "intel_display_irq.h" @@ -1166,8 +1167,7 @@ static u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) return plane_ctl; } -static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 skl_plane_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1225,8 +1225,7 @@ static u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state) return plane_color_ctl; } -static u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 glk_plane_color_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1271,12 +1270,6 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, u32 offset = plane_state->view.color_plane[color_plane].offset; if (intel_fb_uses_dpt(fb)) { - /* - * The DPT object contains only one vma, so the VMA's offset - * within the DPT is always 0. - */ - drm_WARN_ON(display->drm, plane_state->dpt_vma && - intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(display->drm, offset & 0x1fffff); return offset >> 9; } else { @@ -1285,13 +1278,20 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, } } -static u32 skl_plane_surf(const struct intel_plane_state *plane_state, - int color_plane) +static int icl_plane_color_plane(const struct intel_plane_state *plane_state) +{ + if (plane_state->planar_linked_plane && !plane_state->is_y_plane) + return 1; + else + return 0; +} + +static u32 skl_plane_surf_offset(const struct intel_plane_state *plane_state) { + int color_plane = icl_plane_color_plane(plane_state); u32 plane_surf; - plane_surf = intel_plane_ggtt_offset(plane_state) + - skl_surf_address(plane_state, color_plane); + plane_surf = skl_surf_address(plane_state, color_plane); if (plane_state->decrypt) plane_surf |= PLANE_SURF_DECRYPT; @@ -1373,14 +1373,6 @@ static void icl_plane_csc_load_black(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CSC_POSTOFF(pipe, plane_id, 2), 0); } -static int icl_plane_color_plane(const struct intel_plane_state *plane_state) -{ - if (plane_state->planar_linked_plane && !plane_state->is_y_plane) - return 1; - else - return 0; -} - static void skl_plane_update_noarm(struct intel_dsb *dsb, struct intel_plane *plane, @@ -1476,7 +1468,7 @@ skl_plane_update_arm(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), plane_ctl); intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, 0)); + plane_state->surf); } static void icl_plane_update_sel_fetch_noarm(struct intel_dsb *dsb, @@ -1632,7 +1624,6 @@ icl_plane_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - int color_plane = icl_plane_color_plane(plane_state); u32 plane_ctl; plane_ctl = plane_state->ctl | @@ -1658,7 +1649,7 @@ icl_plane_update_arm(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), plane_ctl); intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, color_plane)); + plane_state->surf); } static void skl_plane_capture_error(struct intel_crtc *crtc, @@ -1682,10 +1673,10 @@ skl_plane_async_flip(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - u32 plane_ctl = plane_state->ctl, plane_surf; + u32 plane_ctl = plane_state->ctl; + u32 plane_surf = plane_state->surf; plane_ctl |= skl_plane_ctl_crtc(crtc_state); - plane_surf = skl_plane_surf(plane_state, 0); if (async_flip) { if (DISPLAY_VER(display) >= 30) @@ -2363,11 +2354,10 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, plane_state->damage = DRM_RECT_INIT(0, 0, 0, 0); } - plane_state->ctl = skl_plane_ctl(crtc_state, plane_state); + plane_state->ctl = skl_plane_ctl(plane_state); if (DISPLAY_VER(display) >= 10) - plane_state->color_ctl = glk_plane_color_ctl(crtc_state, - plane_state); + plane_state->color_ctl = glk_plane_color_ctl(plane_state); if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && icl_is_hdr_plane(display, plane->id)) @@ -2814,7 +2804,7 @@ static void skl_disable_tiling(struct intel_plane *plane) intel_de_write_fw(display, PLANE_CTL(plane->pipe, plane->id), plane_ctl); intel_de_write_fw(display, PLANE_SURF(plane->pipe, plane->id), - skl_plane_surf(state, 0)); + state->surf); } struct intel_plane * @@ -2865,6 +2855,8 @@ skl_universal_plane_create(struct intel_display *display, } plane->disable_tiling = skl_disable_tiling; + plane->surf_offset = skl_plane_surf_offset; + if (DISPLAY_VER(display) >= 13) plane->max_stride = adl_plane_max_stride; else @@ -3191,21 +3183,18 @@ bool skl_fixup_initial_plane_config(struct intel_crtc *crtc, to_intel_plane_state(plane->base.state); enum plane_id plane_id = plane->id; enum pipe pipe = crtc->pipe; - u32 base; if (!plane_state->uapi.visible) return false; - base = intel_plane_ggtt_offset(plane_state); - /* * We may have moved the surface to a different * part of ggtt, make the plane aware of that. */ - if (plane_config->base == base) + if (plane_config->base == plane_state->surf) return false; - intel_de_write(display, PLANE_SURF(pipe, plane_id), base); + intel_de_write(display, PLANE_SURF(pipe, plane_id), plane_state->surf); return true; } diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 222c069fdadb..d74cbb43ae6f 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -6,7 +6,6 @@ #include <linux/debugfs.h> #include <drm/drm_blend.h> -#include <drm/drm_file.h> #include <drm/drm_print.h> #include "soc/intel_dram.h" @@ -1389,7 +1388,7 @@ skl_allocate_plane_ddb(struct skl_plane_ddb_iter *iter, { u16 size, extra = 0; - if (data_rate) { + if (data_rate && iter->data_rate) { extra = min_t(u16, iter->size, DIV64_U64_ROUND_UP(iter->size * data_rate, iter->data_rate)); @@ -2273,6 +2272,11 @@ static int skl_max_wm0_lines(const struct intel_crtc_state *crtc_state) return wm0_lines; } +/* + * TODO: In case we use PKG_C_LATENCY to allow C-states when the delayed vblank + * size is too small for the package C exit latency we need to notify PSR about + * the scenario to apply Wa_16025596647. + */ static int skl_max_wm_level_for_vblank(struct intel_crtc_state *crtc_state, int wm0_lines) { @@ -3205,12 +3209,12 @@ adjust_wm_latency(struct intel_display *display, } /* - * WA Level-0 adjustment for 16GB DIMMs: SKL+ + * WA Level-0 adjustment for 16Gb DIMMs: SKL+ * If we could not get dimm info enable this WA to prevent from - * any underrun. If not able to get Dimm info assume 16GB dimm + * any underrun. If not able to get DIMM info assume 16Gb DIMM * to avoid any underrun. */ - if (!display->platform.dg2 && dram_info->wm_lv_0_adjust_needed) + if (!display->platform.dg2 && dram_info->has_16gb_dimms) wm[0] += 1; } @@ -4033,14 +4037,14 @@ DEFINE_SHOW_ATTRIBUTE(intel_sagv_status); void skl_watermark_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; if (HAS_IPC(display)) - debugfs_create_file("i915_ipc_status", 0644, minor->debugfs_root, + debugfs_create_file("i915_ipc_status", 0644, debugfs_root, display, &skl_watermark_ipc_status_fops); if (HAS_SAGV(display)) - debugfs_create_file("i915_sagv_status", 0444, minor->debugfs_root, + debugfs_create_file("i915_sagv_status", 0444, debugfs_root, display, &intel_sagv_status_fops); } diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 6d9f3312de7e..c9a53fde79c4 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -761,7 +761,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, if (display->platform.valleyview || display->platform.cherryview) { /* Disable DPOunit clock gating, can stall pipe */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, 0, DPOUNIT_CLOCK_GATE_DISABLE); } @@ -918,7 +918,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, } else { vlv_dsi_pll_disable(encoder); - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, DPOUNIT_CLOCK_GATE_DISABLE, 0); } diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index d42b61e6f076..f078b9cda96c 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -25,12 +25,12 @@ * Yogesh Mohan Marimuthu <yogesh.mohan.marimuthu@intel.com> */ +#include <linux/iopoll.h> #include <linux/kernel.h> #include <linux/string_helpers.h> #include <drm/drm_print.h> -#include "i915_utils.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" @@ -142,11 +142,9 @@ static int vlv_dsi_pclk(struct intel_encoder *encoder, pll_div &= DSI_PLL_M1_DIV_MASK; pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT; - while (pll_ctl) { - pll_ctl = pll_ctl >> 1; - p++; - } - p--; + p = fls(pll_ctl); + if (p) + p--; if (!p) { drm_err(display->drm, "wrong P1 divisor\n"); @@ -216,6 +214,8 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct intel_display *display = to_intel_display(encoder); + u32 val; + int ret; drm_dbg_kms(display->drm, "\n"); @@ -233,9 +233,10 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, vlv_cck_write(display->drm, CCK_REG_DSI_PLL_CONTROL, config->dsi_pll.ctrl); - if (wait_for(vlv_cck_read(display->drm, CCK_REG_DSI_PLL_CONTROL) & - DSI_PLL_LOCK, 20)) { - + ret = poll_timeout_us(val = vlv_cck_read(display->drm, CCK_REG_DSI_PLL_CONTROL), + val & DSI_PLL_LOCK, + 500, 20 * 1000, false); + if (ret) { vlv_cck_put(display->drm); drm_err(display->drm, "DSI PLL lock failed\n"); return; @@ -262,6 +263,11 @@ void vlv_dsi_pll_disable(struct intel_encoder *encoder) vlv_cck_put(display->drm); } +static bool has_dsic_clock(struct intel_display *display) +{ + return display->platform.broxton; +} + bool bxt_dsi_pll_is_enabled(struct intel_display *display) { bool enabled; @@ -284,7 +290,7 @@ bool bxt_dsi_pll_is_enabled(struct intel_display *display) * causes a system hang. */ val = intel_de_read(display, BXT_DSI_PLL_CTL); - if (display->platform.geminilake) { + if (!has_dsic_clock(display)) { if (!(val & BXT_DSIA_16X_MASK)) { drm_dbg_kms(display->drm, "Invalid PLL divider (%08x)\n", val); @@ -358,6 +364,8 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, u32 pclk; config->dsi_pll.ctrl = intel_de_read(display, BXT_DSI_PLL_CTL); + if (!has_dsic_clock(display)) + config->dsi_pll.ctrl &= ~BXT_DSIC_16X_MASK; pclk = bxt_dsi_pclk(encoder, config); @@ -514,7 +522,9 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, * Spec says both have to be programmed, even if one is not getting * used. Configure MIPI_CLOCK_CTL dividers in modeset */ - config->dsi_pll.ctrl = dsi_ratio | BXT_DSIA_16X_BY2 | BXT_DSIC_16X_BY2; + config->dsi_pll.ctrl = dsi_ratio | BXT_DSIA_16X_BY2; + if (has_dsic_clock(display)) + config->dsi_pll.ctrl |= BXT_DSIC_16X_BY2; /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 15835952352e..ed6599694835 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2158,18 +2158,12 @@ static int set_context_image(struct i915_gem_context *ctx, goto out_ce; } - state = kmalloc(ce->engine->context_size, GFP_KERNEL); - if (!state) { - ret = -ENOMEM; + state = memdup_user(u64_to_user_ptr(user.image), ce->engine->context_size); + if (IS_ERR(state)) { + ret = PTR_ERR(state); goto out_ce; } - if (copy_from_user(state, u64_to_user_ptr(user.image), - ce->engine->context_size)) { - ret = -EFAULT; - goto out_state; - } - shmem_state = shmem_create_from_data(ce->engine->name, state, ce->engine->context_size); if (IS_ERR(shmem_state)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ca7e9216934a..f243f8a5215d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1382,8 +1382,9 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) */ if (flushes & CLFLUSH_AFTER) drm_clflush_virt_range(addr, sizeof(*addr)); - } else + } else { *addr = value; + } } static u64 @@ -1567,36 +1568,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) do { u64 offset = eb_relocate_entry(eb, ev, r); - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { + if (likely(offset == 0)) + continue; + + if ((s64)offset < 0) { remain = (int)offset; goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - __put_user(offset, - &urelocs[r - stack].presumed_offset); } + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + __put_user(offset, &urelocs[r - stack].presumed_offset); } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 86d9d2fcb6a6..e747f5ed195e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -110,6 +110,7 @@ struct tiled_blits { static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) { + struct intel_display *display = i915->display; int gen = GRAPHICS_VER(i915); /* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */ @@ -121,7 +122,7 @@ static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) return false; - return HAS_DISPLAY(i915); + return HAS_DISPLAY(display); } static bool fast_blit_ok(const struct blit_buffer *buf) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c3f17e51885..78734c404a6d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1096,32 +1096,20 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, unsigned long addr, bool unfaultable) { - struct vm_area_struct *area; - int err = 0, i; + int i; pr_info("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr); - mmap_read_lock(current->mm); - area = vma_lookup(current->mm, addr); - mmap_read_unlock(current->mm); - if (!area) { - pr_err("%s: Did not create a vm_area_struct for the mmap\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; if (get_user(x, ux)) { - err = -EFAULT; if (!unfaultable) { pr_err("%s: Unable to read from mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - goto out_unmap; + return -EFAULT; } continue; @@ -1130,37 +1118,29 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, if (unfaultable) { pr_err("%s: Faulted unmappable memory\n", obj->mm.region->name); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } if (x != expand32(POISON_INUSE)) { pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", obj->mm.region->name, i * sizeof(x), x, expand32(POISON_INUSE)); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } x = expand32(POISON_FREE); if (put_user(x, ux)) { pr_err("%s: Unable to write to mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - err = -EFAULT; - goto out_unmap; + return -EFAULT; } } - if (unfaultable) { - if (err == -EFAULT) - err = 0; - } else { - obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; - err = wc_check(obj); - } -out_unmap: - vm_munmap(addr, obj->base.size); - return err; + if (unfaultable) + return 0; + + obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; + return wc_check(obj); } #define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0) @@ -1176,6 +1156,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, struct drm_i915_private *i915 = placements[0]->i915; struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; + struct vm_area_struct *area; unsigned long addr; LIST_HEAD(objects); u64 offset; @@ -1207,20 +1188,30 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, goto out_put; } + mmap_read_lock(current->mm); + area = vma_lookup(current->mm, addr); + mmap_read_unlock(current->mm); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_addr; + } + if (flags & IGT_MMAP_MIGRATE_FILL) { err = igt_fill_mappable(placements[0], &objects); if (err) - goto out_put; + goto out_addr; } err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; err = i915_gem_object_pin_pages(obj); if (err) { i915_gem_object_unlock(obj); - goto out_put; + goto out_addr; } err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, @@ -1228,7 +1219,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_is_lmem(obj), expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); - if (rq) { + if (rq && !err) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence, @@ -1237,7 +1228,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; if (flags & IGT_MMAP_MIGRATE_EVICTABLE) igt_make_evictable(&objects); @@ -1245,16 +1236,16 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; /* - * Ensure we only simulate the gpu failuire when faulting the + * Ensure we only simulate the gpu failure when faulting the * pages. */ err = i915_gem_object_wait_moving_fence(obj, true); i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; i915_ttm_migrate_set_failure_modes(true, false); } @@ -1298,6 +1289,9 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } } +out_addr: + vm_munmap(addr, obj->base.size); + out_put: i915_gem_object_put(obj); igt_close_objects(i915, &objects); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 6c499692d61e..88b147fa5cb1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -148,7 +148,7 @@ static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) * * Testing on actual hardware has shown there is no /16. */ - return DIV_ROUND_CLOSEST(i9xx_fsb_freq(uncore->i915), 4) * 1000; + return DIV_ROUND_CLOSEST(intel_fsb_freq(uncore->i915), 4) * 1000; } static u32 read_clock_frequency(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 4dc23b8d3aa2..dcd40b30a96b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -82,14 +82,15 @@ static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) void intel_gt_debugfs_register(struct intel_gt *gt) { + struct dentry *debugfs_root = gt->i915->drm.debugfs_root; struct dentry *root; char gtname[4]; - if (!gt->i915->drm.primary->debugfs_root) + if (!debugfs_root) return; snprintf(gtname, sizeof(gtname), "gt%u", gt->info.id); - root = debugfs_create_dir(gtname, gt->i915->drm.primary->debugfs_root); + root = debugfs_create_dir(gtname, debugfs_root); if (IS_ERR(root)) return; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0b35fdd461d4..006042e0b229 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -9,6 +9,7 @@ #include "display/intel_display.h" #include "display/intel_display_rps.h" +#include "soc/intel_dram.h" #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -276,20 +277,24 @@ static void gen5_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned int fsb_freq, mem_freq; u8 fmax, fmin, fstart; u32 rgvmodectl; int c_m, i; - if (i915->fsb_freq <= 3200000) + fsb_freq = intel_fsb_freq(i915); + mem_freq = intel_mem_freq(i915); + + if (fsb_freq <= 3200000) c_m = 0; - else if (i915->fsb_freq <= 4800000) + else if (fsb_freq <= 4800000) c_m = 1; else c_m = 2; for (i = 0; i < ARRAY_SIZE(cparams); i++) { if (cparams[i].i == c_m && - cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) { + cparams[i].t == DIV_ROUND_CLOSEST(mem_freq, 1000)) { rps->ips.m = cparams[i].m; rps->ips.c = cparams[i].c; break; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b37e400f74e5..7d486dfa2fc1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -337,12 +337,26 @@ static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); } static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization:ivb,hsw + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, + CACHE_MODE_1, + PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); } static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -634,6 +648,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); @@ -669,6 +685,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1406306137:icl,ehl */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display. + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } } /* @@ -2306,15 +2331,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { - /* - * "Disable Repacking for Compression (masked R/W access) - * before rendering compressed surfaces for display." - */ - wa_masked_en(wal, CACHE_MODE_0_GEN7, - DISABLE_REPACKING_FOR_COMPRESSION); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2565,18 +2581,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_MODE_GEN7(RENDER_RING_BASE), GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); - /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); - - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization:ivb,hsw - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - wa_masked_en(wal, - CACHE_MODE_1, - PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. @@ -2643,9 +2647,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); - /* WaDisable_RenderCache_OperationalFlush:snb */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); - /* * From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index f057c16410e7..4f252f704975 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -904,7 +904,7 @@ static void active_engine(struct kthread_work *work) arg->result = PTR_ERR(ce[count]); pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, arg->result); - while (--count) + while (count--) intel_context_put(ce[count]); return; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0d5197c0824a..380a11c92d63 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -13,7 +13,7 @@ #include "intel_guc_ct.h" #include "intel_guc_print.h" -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) enum { CT_DEAD_ALIVE = 0, CT_DEAD_SETUP, @@ -144,7 +144,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) spin_lock_init(&ct->requests.lock); INIT_LIST_HEAD(&ct->requests.pending); INIT_LIST_HEAD(&ct->requests.incoming); -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); #endif INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); @@ -373,7 +373,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) ct->enabled = true; ct->stall_time = KTIME_MAX; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) ct->dead_ct_reported = false; ct->dead_ct_reason = CT_DEAD_ALIVE; #endif @@ -1377,7 +1377,7 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, ct->ctbs.recv.desc->tail); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static void ct_dead_ct_worker_func(struct work_struct *w) { struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); @@ -1386,6 +1386,9 @@ static void ct_dead_ct_worker_func(struct work_struct *w) if (ct->dead_ct_reported) return; + if (i915_error_injected()) + return; + ct->dead_ct_reported = true; guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 2c4bb9a941be..e9a6ec4e6d38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -97,7 +97,7 @@ struct intel_guc_ct { /** @stall_time: time of first time a CTB submission is stalled */ ktime_t stall_time; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int dead_ct_reason; bool dead_ct_reported; struct work_struct dead_ct_worker; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index e7ccfa520df3..384d1400134d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -46,6 +46,14 @@ static void guc_prepare_xfer(struct intel_gt *gt) /* allows for 5us (in 10ns units) before GT can go to RC6 */ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); } + + /* + * Starting from IP 12.50 we need to enable the mirroring of GuC + * internal state to debug registers. This is always enabled on previous + * IPs. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) + intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG); } static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 3fd798837502..f73dab527547 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -96,6 +96,7 @@ #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) #define GUC_SHIM_CONTROL2 _MMIO(0xc068) +#define GUC_ENABLE_DEBUG_REG (1<<11) #define GUC_IS_PRIVILEGED (1<<29) #define GSC_LOADS_HUC (1<<30) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 673534f061ef..415422b5943c 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -194,9 +194,9 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - if (minor->debugfs_root && gvt->debugfs_root) { + if (debugfs_root && gvt->debugfs_root) { debugfs_remove_recursive(vgpu->debugfs); vgpu->debugfs = NULL; } @@ -208,9 +208,9 @@ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_debugfs_init(struct intel_gvt *gvt) { - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root); + gvt->debugfs_root = debugfs_create_dir("gvt", debugfs_root); debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, &gvt->mmio.num_tracked_mmio); @@ -222,9 +222,9 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) */ void intel_gvt_debugfs_clean(struct intel_gvt *gvt) { - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - if (minor->debugfs_root) { + if (debugfs_root) { debugfs_remove_recursive(gvt->debugfs_root); gvt->debugfs_root = NULL; } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 0dbc4e289300..6b0c1162505a 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -257,10 +257,9 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) * claimed the cache and we know that is does not match our * idx. If, and only if, the timeline is currently zero is it * worth competing to claim it atomically for ourselves (for - * only the winner of that race will cmpxchg return the old - * value of 0). + * only the winner of that race will cmpxchg succeed). */ - if (!cached && !cmpxchg64(&it->timeline, 0, idx)) + if (!cached && try_cmpxchg64(&it->timeline, &cached, idx)) return it; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 967c0501e91e..23fa098c4479 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -720,26 +720,24 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, }; -void i915_debugfs_register(struct drm_i915_private *dev_priv) +void i915_debugfs_register(struct drm_i915_private *i915) { - struct drm_minor *minor = dev_priv->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; int i; - i915_debugfs_params(dev_priv); + i915_debugfs_params(i915); - debugfs_create_file("i915_forcewake_user", S_IRUSR, minor->debugfs_root, - to_i915(minor->dev), &i915_forcewake_fops); + debugfs_create_file("i915_forcewake_user", S_IRUSR, debugfs_root, + i915, &i915_forcewake_fops); for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { - debugfs_create_file(i915_debugfs_files[i].name, - S_IRUGO | S_IWUSR, - minor->debugfs_root, - to_i915(minor->dev), + debugfs_create_file(i915_debugfs_files[i].name, S_IRUGO | S_IWUSR, + debugfs_root, i915, i915_debugfs_files[i].fops); } drm_debugfs_create_files(i915_debugfs_list, ARRAY_SIZE(i915_debugfs_list), - minor->debugfs_root, minor); + debugfs_root, i915->drm.primary); - i915_gpu_error_debugfs_register(dev_priv); + i915_gpu_error_debugfs_register(i915); } diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 33d2dcb0de65..89ab5eb14779 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -248,11 +248,11 @@ i915_debugfs_create_charp(const char *name, umode_t mode, /* add a subdirectory with files for each i915 param */ struct dentry *i915_debugfs_params(struct drm_i915_private *i915) { - struct drm_minor *minor = i915->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; struct i915_params *params = &i915->params; struct dentry *dir; - dir = debugfs_create_dir("i915_params", minor->debugfs_root); + dir = debugfs_create_dir("i915_params", debugfs_root); if (IS_ERR(dir)) return dir; diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index c6263c6d3384..70f042ce8705 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -977,7 +977,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_disable(display); drm_client_dev_suspend(&i915->drm, false); - if (HAS_DISPLAY(i915)) { + if (HAS_DISPLAY(display)) { drm_kms_helper_poll_disable(&i915->drm); intel_display_driver_disable_user_access(display); @@ -989,7 +989,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_irq_suspend(i915); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(i915)) + if (HAS_DISPLAY(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1060,7 +1060,7 @@ static int i915_drm_suspend(struct drm_device *dev) * properly. */ intel_power_domains_disable(display); drm_client_dev_suspend(dev, false); - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(display)) { drm_kms_helper_poll_disable(dev); intel_display_driver_disable_user_access(display); } @@ -1072,7 +1072,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_irq_suspend(dev_priv); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1219,7 +1219,7 @@ static int i915_drm_resume(struct drm_device *dev) */ intel_irq_resume(dev_priv); - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(display)) drm_mode_config_reset(dev); i915_gem_resume(dev_priv); @@ -1228,14 +1228,14 @@ static int i915_drm_resume(struct drm_device *dev) intel_clock_gating_init(dev_priv); - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_display_driver_resume(display); - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(display)) { intel_display_driver_enable_user_access(display); drm_kms_helper_poll_enable(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4e4e89746aa6..2f3965feada1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -237,8 +237,6 @@ struct drm_i915_private { bool preserve_bios_swizzle; - unsigned int fsb_freq, mem_freq, is_ddr3; - unsigned int hpll_freq; unsigned int czclk_freq; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0e4b832dff84..7582ef34bf3f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -685,6 +685,74 @@ static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); } +/* This list includes registers that are useful in debugging GuC hangs. */ +const struct { + u32 start; + u32 count; +} guc_hw_reg_state[] = { + { 0xc0b0, 2 }, + { 0xc000, 65 }, + { 0xc140, 1 }, + { 0xc180, 16 }, + { 0xc1dc, 10 }, + { 0xc300, 79 }, + { 0xc4b4, 47 }, + { 0xc574, 1 }, + { 0xc57c, 1 }, + { 0xc584, 11 }, + { 0xc5c0, 8 }, + { 0xc5e4, 1 }, + { 0xc5ec, 103 }, + { 0xc7c0, 1 }, + { 0xc0b0, 2 } +}; + +static u32 print_range_line(struct drm_i915_error_state_buf *m, u32 start, u32 *dump, u32 count) +{ + if (count >= 8) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3], + dump[4], dump[5], dump[6], dump[7]); + return 8; + } else if (count >= 4) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3]); + return 4; + } else if (count >= 2) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x\n", start, dump[0], dump[1]); + return 2; + } + + err_printf(m, "[0x%04x] 0x%08x\n", start, dump[0]); + return 1; +} + +static void err_print_guc_hw_state(struct drm_i915_error_state_buf *m, u32 *hw_state) +{ + u32 total = 0; + int i; + + if (!hw_state) + return; + + err_printf(m, "GuC Register State:\n"); + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) { + u32 entry = 0; + + while (entry < guc_hw_reg_state[i].count) { + u32 start = guc_hw_reg_state[i].start + entry * sizeof(u32); + u32 count = guc_hw_reg_state[i].count - entry; + u32 *values = hw_state + total + entry; + + entry += print_range_line(m, start, values, count); + } + + GEM_BUG_ON(entry != guc_hw_reg_state[i].count); + total += entry; + } +} + static void err_print_uc(struct drm_i915_error_state_buf *m, const struct intel_uc_coredump *error_uc) { @@ -693,6 +761,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); + err_print_guc_hw_state(m, error_uc->guc.hw_state); intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); @@ -1025,6 +1094,7 @@ static void cleanup_uc(struct intel_uc_coredump *uc) kfree(uc->huc_fw.file_wanted.path); i915_vma_coredump_free(uc->guc.vma_log); i915_vma_coredump_free(uc->guc.vma_ctb); + kfree(uc->guc.hw_state); kfree(uc); } @@ -1721,6 +1791,37 @@ static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; } +static u32 read_guc_state_reg(struct intel_uncore *uncore, int range, int count) +{ + GEM_BUG_ON(range >= ARRAY_SIZE(guc_hw_reg_state)); + GEM_BUG_ON(count >= guc_hw_reg_state[range].count); + + return intel_uncore_read(uncore, + _MMIO(guc_hw_reg_state[range].start + count * sizeof(u32))); +} + +static void gt_record_guc_hw_state(struct intel_uncore *uncore, + struct intel_uc_coredump *error_uc) +{ + u32 *hw_state; + u32 count = 0; + int i, j; + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + count += guc_hw_reg_state[i].count; + + hw_state = kcalloc(count, sizeof(u32), ALLOW_FAIL); + if (!hw_state) + return; + + count = 0; + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + for (j = 0; j < guc_hw_reg_state[i].count; j++) + hw_state[count++] = read_guc_state_reg(uncore, i, j); + + error_uc->guc.hw_state = hw_state; +} + static struct intel_uc_coredump * gt_record_uc(struct intel_gt_coredump *gt, struct i915_vma_compress *compress) @@ -1755,6 +1856,7 @@ gt_record_uc(struct intel_gt_coredump *gt, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); + gt_record_guc_hw_state(gt->_gt->uncore, error_uc); return error_uc; } @@ -2445,11 +2547,11 @@ static const struct file_operations i915_error_state_fops = { void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) { - struct drm_minor *minor = i915->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; - debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915, + debugfs_create_file("i915_error_state", 0644, debugfs_root, i915, &i915_error_state_fops); - debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, + debugfs_create_file("i915_gpu_info", 0644, debugfs_root, i915, &i915_gpu_info_fops); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 182324979278..91b3df621a49 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -177,6 +177,7 @@ struct intel_gt_coredump { struct intel_ctb_coredump ctb[2]; struct i915_vma_coredump *vma_ctb; struct i915_vma_coredump *vma_log; + u32 *hw_state; u32 timestamp; u16 last_fence; bool is_guc_capture; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 191ed8bb1d9c..a5fa40ab5de2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -439,7 +439,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) * able to process them after we restore SDEIER (as soon as we restore * it, we'll get an interrupt if SDEIIR still has something to process * due to its back queue). */ - if (!HAS_PCH_NOP(i915)) { + if (!HAS_PCH_NOP(display)) { sde_ier = raw_reg_read(regs, SDEIER); raw_reg_write(regs, SDEIER, 0); } @@ -459,7 +459,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) de_iir = raw_reg_read(regs, DEIIR); if (de_iir) { raw_reg_write(regs, DEIIR, de_iir); - if (DISPLAY_VER(i915) >= 7) + if (DISPLAY_VER(display) >= 7) ivb_display_irq_handler(display, de_iir); else ilk_display_irq_handler(display, de_iir); @@ -834,6 +834,7 @@ static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) static u32 i9xx_error_mask(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; /* * On gen2/3 FBC generates (seemingly spurious) * display INVALID_GTT/INVALID_GTT_PTE table errors. @@ -846,7 +847,7 @@ static u32 i9xx_error_mask(struct drm_i915_private *i915) * Unfortunately we can't mask off individual PGTBL_ER bits, * so we just have to mask off all page table errors via EMR. */ - if (HAS_FBC(i915)) + if (HAS_FBC(display)) return I915_ERROR_MEMORY_REFRESH; else return I915_ERROR_PAGE_TABLE | @@ -924,12 +925,12 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) I915_MASTER_ERROR_INTERRUPT | I915_USER_INTERRUPT; - if (DISPLAY_VER(dev_priv) >= 3) { + if (DISPLAY_VER(display) >= 3) { dev_priv->irq_mask &= ~I915_ASLE_INTERRUPT; enable_mask |= I915_ASLE_INTERRUPT; } - if (HAS_HOTPLUG(dev_priv)) { + if (HAS_HOTPLUG(display)) { dev_priv->irq_mask &= ~I915_DISPLAY_PORT_INTERRUPT; enable_mask |= I915_DISPLAY_PORT_INTERRUPT; } @@ -963,7 +964,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) ret = IRQ_HANDLED; - if (HAS_HOTPLUG(dev_priv) && + if (HAS_HOTPLUG(display) && iir & I915_DISPLAY_PORT_INTERRUPT) hotplug_status = i9xx_hpd_irq_ack(display); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 03b895897f60..354ef75ef6a5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -412,9 +412,9 @@ #define FW_BLC _MMIO(0x20d8) #define FW_BLC2 _MMIO(0x20dc) #define FW_BLC_SELF _MMIO(0x20e0) /* 915+ only */ -#define FW_BLC_SELF_EN_MASK (1 << 31) -#define FW_BLC_SELF_FIFO_MASK (1 << 16) /* 945 only */ -#define FW_BLC_SELF_EN (1 << 15) /* 945 only */ +#define FW_BLC_SELF_EN_MASK REG_BIT(31) +#define FW_BLC_SELF_FIFO_MASK REG_BIT(16) /* 945 only */ +#define FW_BLC_SELF_EN REG_BIT(15) /* 945 only */ #define MM_BURST_LENGTH 0x00700000 #define MM_FIFO_WATERMARK 0x0001F000 #define LM_BURST_LENGTH 0x00000700 @@ -613,7 +613,8 @@ #define DSTATE_GFX_CLOCK_GATING (1 << 1) #define DSTATE_DOT_CLOCK_GATING (1 << 0) -#define DSPCLK_GATE_D(__i915) _MMIO(DISPLAY_MMIO_BASE(__i915) + 0x6200) +#define DSPCLK_GATE_D _MMIO(0x6200) +#define VLV_DSPCLK_GATE_D _MMIO(VLV_DISPLAY_BASE + 0x6200) # define DPUNIT_B_CLOCK_GATE_DISABLE (1 << 30) /* 965 */ # define VSUNIT_CLOCK_GATE_DISABLE (1 << 29) /* 965 */ # define VRHUNIT_CLOCK_GATE_DISABLE (1 << 28) /* 965 */ diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c index 4c02a04be681..3a95a55b2e87 100644 --- a/drivers/gpu/drm/i915/i915_switcheroo.c +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -15,13 +15,14 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_display *display = i915 ? i915->display : NULL; pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; if (!i915) { dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); return; } - if (!HAS_DISPLAY(i915)) { + if (!HAS_DISPLAY(display)) { dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); return; } @@ -44,13 +45,14 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, static bool i915_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_display *display = i915 ? i915->display : NULL; /* * FIXME: open_count is protected by drm_global_mutex but that would lead to * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ - return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0; + return i915 && HAS_DISPLAY(display) && atomic_read(&i915->drm.open_count) == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f7fb40cfdb70..9cb40c2c4b12 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -267,8 +267,13 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) (Wmax)) #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) -/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) +/* + * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. + * On PREEMPT_RT the context isn't becoming atomic because it is used in an + * interrupt handler or because a spinlock_t is acquired. This leads to + * warnings which don't occur otherwise and therefore the check is disabled. + */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) #else # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c index f86a3629ae9e..467740969431 100644 --- a/drivers/gpu/drm/i915/intel_clock_gating.c +++ b/drivers/gpu/drm/i915/intel_clock_gating.c @@ -132,16 +132,17 @@ static void ibx_init_clock_gating(struct drm_i915_private *i915) static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) { + struct intel_display *display = dev_priv->display; enum pipe pipe; - for_each_pipe(dev_priv, pipe) { - intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(dev_priv, pipe), + for_each_pipe(display, pipe) { + intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(display, pipe), 0, DISP_TRICKLE_FEED_DISABLE); - intel_uncore_rmw(&dev_priv->uncore, DSPSURF(dev_priv, pipe), + intel_uncore_rmw(&dev_priv->uncore, DSPSURF(display, pipe), 0, 0); intel_uncore_posting_read(&dev_priv->uncore, - DSPSURF(dev_priv, pipe)); + DSPSURF(display, pipe)); } } @@ -218,7 +219,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *i915) /* The below fixes the weird display corruption, a few pixels shifted * downward, on (only) LVDS of some HP laptops with IVY. */ - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { val = intel_uncore_read(&i915->uncore, TRANS_CHICKEN2(pipe)); val |= TRANS_CHICKEN2_TIMING_OVERRIDE; val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; @@ -229,7 +230,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *i915) intel_uncore_write(&i915->uncore, TRANS_CHICKEN2(pipe), val); } /* WADP0ClockGatingDisable */ - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { intel_uncore_write(&i915->uncore, TRANS_CHICKEN1(pipe), TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } @@ -307,11 +308,13 @@ static void gen6_init_clock_gating(struct drm_i915_private *i915) static void lpt_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; + /* * TODO: this bit should only be enabled when really needed, then * disabled when not needed anymore in order to save power. */ - if (HAS_PCH_LPT_LP(i915)) + if (HAS_PCH_LPT_LP(display)) intel_uncore_rmw(&i915->uncore, SOUTH_DSPCLK_GATE_D, 0, PCH_LP_PARTITION_LEVEL_DISABLE); @@ -355,7 +358,9 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) static void cnp_init_clock_gating(struct drm_i915_private *i915) { - if (!HAS_PCH_CNP(i915)) + struct intel_display *display = i915->display; + + if (!HAS_PCH_CNP(display)) return; /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ @@ -421,6 +426,7 @@ static void skl_init_clock_gating(struct drm_i915_private *i915) static void bdw_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; enum pipe pipe; /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ @@ -432,7 +438,7 @@ static void bdw_init_clock_gating(struct drm_i915_private *i915) /* WaPsrDPAMaskVBlankInSRD:bdw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PAR1_1, 0, HSW_MASK_VBL_TO_PIPE_IN_SRD); - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PIPESL_1(pipe), 0, BDW_UNMASK_VBL_TO_REGS_IN_SRD); @@ -468,6 +474,7 @@ static void bdw_init_clock_gating(struct drm_i915_private *i915) static void hsw_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; enum pipe pipe; /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ @@ -476,7 +483,7 @@ static void hsw_init_clock_gating(struct drm_i915_private *i915) /* WaPsrDPAMaskVBlankInSRD:hsw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PAR1_1, 0, HSW_MASK_VBL_TO_PIPE_IN_SRD); - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* WaPsrDPRSUnmaskVBlankInSRD:hsw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PIPESL_1(pipe), 0, HSW_UNMASK_VBL_TO_REGS_IN_SRD); @@ -494,6 +501,8 @@ static void hsw_init_clock_gating(struct drm_i915_private *i915) static void ivb_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; + intel_uncore_write(&i915->uncore, ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaFbcAsynchFlipDisableFbcQueue:ivb */ @@ -531,7 +540,7 @@ static void ivb_init_clock_gating(struct drm_i915_private *i915) intel_uncore_rmw(&i915->uncore, GEN6_MBCUNIT_SNPCR, GEN6_MBC_SNPCR_MASK, GEN6_MBC_SNPCR_MED); - if (!HAS_PCH_NOP(i915)) + if (!HAS_PCH_NOP(display)) cpt_init_clock_gating(i915); gen6_check_mch_setup(i915); @@ -611,7 +620,7 @@ static void g4x_init_clock_gating(struct drm_i915_private *i915) OVCUNIT_CLOCK_GATE_DISABLE; if (IS_GM45(i915)) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; - intel_uncore_write(&i915->uncore, DSPCLK_GATE_D(i915), dspclk_gate); + intel_uncore_write(&i915->uncore, DSPCLK_GATE_D, dspclk_gate); g4x_disable_trickle_feed(i915); } @@ -622,7 +631,7 @@ static void i965gm_init_clock_gating(struct drm_i915_private *i915) intel_uncore_write(uncore, RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); intel_uncore_write(uncore, RENCLK_GATE_D2, 0); - intel_uncore_write(uncore, DSPCLK_GATE_D(i915), 0); + intel_uncore_write(uncore, DSPCLK_GATE_D, 0); intel_uncore_write(uncore, RAMCLK_GATE_D, 0); intel_uncore_write16(uncore, DEUC, 0); intel_uncore_write(uncore, diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index 87ac4446d306..ca57a3dd3148 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -62,6 +62,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) { struct drm_i915_private *dev_priv = iter->i915; + struct intel_display *display = dev_priv->display; MMIO_RING_D(RING_IMR); MMIO_D(SDEIMR); @@ -133,38 +134,38 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x650b4)); MMIO_D(_MMIO(0xc4040)); MMIO_D(DERRMR); - MMIO_D(PIPEDSL(dev_priv, PIPE_A)); - MMIO_D(PIPEDSL(dev_priv, PIPE_B)); - MMIO_D(PIPEDSL(dev_priv, PIPE_C)); - MMIO_D(PIPEDSL(dev_priv, _PIPE_EDP)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_A)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_B)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_C)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPESTAT(dev_priv, PIPE_A)); - MMIO_D(PIPESTAT(dev_priv, PIPE_B)); - MMIO_D(PIPESTAT(dev_priv, PIPE_C)); - MMIO_D(PIPESTAT(dev_priv, _PIPE_EDP)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_A)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_B)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_C)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, _PIPE_EDP)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_A)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_B)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_C)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, _PIPE_EDP)); - MMIO_D(CURCNTR(dev_priv, PIPE_A)); - MMIO_D(CURCNTR(dev_priv, PIPE_B)); - MMIO_D(CURCNTR(dev_priv, PIPE_C)); - MMIO_D(CURPOS(dev_priv, PIPE_A)); - MMIO_D(CURPOS(dev_priv, PIPE_B)); - MMIO_D(CURPOS(dev_priv, PIPE_C)); - MMIO_D(CURBASE(dev_priv, PIPE_A)); - MMIO_D(CURBASE(dev_priv, PIPE_B)); - MMIO_D(CURBASE(dev_priv, PIPE_C)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_A)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_B)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_C)); + MMIO_D(PIPEDSL(display, PIPE_A)); + MMIO_D(PIPEDSL(display, PIPE_B)); + MMIO_D(PIPEDSL(display, PIPE_C)); + MMIO_D(PIPEDSL(display, _PIPE_EDP)); + MMIO_D(TRANSCONF(display, TRANSCODER_A)); + MMIO_D(TRANSCONF(display, TRANSCODER_B)); + MMIO_D(TRANSCONF(display, TRANSCODER_C)); + MMIO_D(TRANSCONF(display, TRANSCODER_EDP)); + MMIO_D(PIPESTAT(display, PIPE_A)); + MMIO_D(PIPESTAT(display, PIPE_B)); + MMIO_D(PIPESTAT(display, PIPE_C)); + MMIO_D(PIPESTAT(display, _PIPE_EDP)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_A)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_B)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_C)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, _PIPE_EDP)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_A)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_B)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_C)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, _PIPE_EDP)); + MMIO_D(CURCNTR(display, PIPE_A)); + MMIO_D(CURCNTR(display, PIPE_B)); + MMIO_D(CURCNTR(display, PIPE_C)); + MMIO_D(CURPOS(display, PIPE_A)); + MMIO_D(CURPOS(display, PIPE_B)); + MMIO_D(CURPOS(display, PIPE_C)); + MMIO_D(CURBASE(display, PIPE_A)); + MMIO_D(CURBASE(display, PIPE_B)); + MMIO_D(CURBASE(display, PIPE_C)); + MMIO_D(CUR_FBC_CTL(display, PIPE_A)); + MMIO_D(CUR_FBC_CTL(display, PIPE_B)); + MMIO_D(CUR_FBC_CTL(display, PIPE_C)); MMIO_D(_MMIO(0x700ac)); MMIO_D(_MMIO(0x710ac)); MMIO_D(_MMIO(0x720ac)); @@ -172,32 +173,32 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x70094)); MMIO_D(_MMIO(0x70098)); MMIO_D(_MMIO(0x7009c)); - MMIO_D(DSPCNTR(dev_priv, PIPE_A)); - MMIO_D(DSPADDR(dev_priv, PIPE_A)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_A)); - MMIO_D(DSPPOS(dev_priv, PIPE_A)); - MMIO_D(DSPSIZE(dev_priv, PIPE_A)); - MMIO_D(DSPSURF(dev_priv, PIPE_A)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_A)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_A)); + MMIO_D(DSPCNTR(display, PIPE_A)); + MMIO_D(DSPADDR(display, PIPE_A)); + MMIO_D(DSPSTRIDE(display, PIPE_A)); + MMIO_D(DSPPOS(display, PIPE_A)); + MMIO_D(DSPSIZE(display, PIPE_A)); + MMIO_D(DSPSURF(display, PIPE_A)); + MMIO_D(DSPOFFSET(display, PIPE_A)); + MMIO_D(DSPSURFLIVE(display, PIPE_A)); MMIO_D(REG_50080(PIPE_A, PLANE_PRIMARY)); - MMIO_D(DSPCNTR(dev_priv, PIPE_B)); - MMIO_D(DSPADDR(dev_priv, PIPE_B)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_B)); - MMIO_D(DSPPOS(dev_priv, PIPE_B)); - MMIO_D(DSPSIZE(dev_priv, PIPE_B)); - MMIO_D(DSPSURF(dev_priv, PIPE_B)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_B)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_B)); + MMIO_D(DSPCNTR(display, PIPE_B)); + MMIO_D(DSPADDR(display, PIPE_B)); + MMIO_D(DSPSTRIDE(display, PIPE_B)); + MMIO_D(DSPPOS(display, PIPE_B)); + MMIO_D(DSPSIZE(display, PIPE_B)); + MMIO_D(DSPSURF(display, PIPE_B)); + MMIO_D(DSPOFFSET(display, PIPE_B)); + MMIO_D(DSPSURFLIVE(display, PIPE_B)); MMIO_D(REG_50080(PIPE_B, PLANE_PRIMARY)); - MMIO_D(DSPCNTR(dev_priv, PIPE_C)); - MMIO_D(DSPADDR(dev_priv, PIPE_C)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_C)); - MMIO_D(DSPPOS(dev_priv, PIPE_C)); - MMIO_D(DSPSIZE(dev_priv, PIPE_C)); - MMIO_D(DSPSURF(dev_priv, PIPE_C)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_C)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_C)); + MMIO_D(DSPCNTR(display, PIPE_C)); + MMIO_D(DSPADDR(display, PIPE_C)); + MMIO_D(DSPSTRIDE(display, PIPE_C)); + MMIO_D(DSPPOS(display, PIPE_C)); + MMIO_D(DSPSIZE(display, PIPE_C)); + MMIO_D(DSPSURF(display, PIPE_C)); + MMIO_D(DSPOFFSET(display, PIPE_C)); + MMIO_D(DSPSURFLIVE(display, PIPE_C)); MMIO_D(REG_50080(PIPE_C, PLANE_PRIMARY)); MMIO_D(SPRCTL(PIPE_A)); MMIO_D(SPRLINOFF(PIPE_A)); @@ -238,73 +239,73 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(SPRSCALE(PIPE_C)); MMIO_D(SPRSURFLIVE(PIPE_C)); MMIO_D(REG_50080(PIPE_C, PLANE_SPRITE0)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_A)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_A)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_B)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_B)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_C)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_C)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_EDP)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_EDP)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_A)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_A)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_A)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_A)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_A)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_A)); + MMIO_D(BCLRPAT(display, TRANSCODER_A)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_A)); + MMIO_D(PIPESRC(display, TRANSCODER_A)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_B)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_B)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_B)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_B)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_B)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_B)); + MMIO_D(BCLRPAT(display, TRANSCODER_B)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_B)); + MMIO_D(PIPESRC(display, TRANSCODER_B)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_C)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_C)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_C)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_C)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_C)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_C)); + MMIO_D(BCLRPAT(display, TRANSCODER_C)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_C)); + MMIO_D(PIPESRC(display, TRANSCODER_C)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_EDP)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_EDP)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_EDP)); + MMIO_D(BCLRPAT(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_EDP)); MMIO_D(PF_CTL(PIPE_A)); MMIO_D(PF_WIN_SZ(PIPE_A)); MMIO_D(PF_WIN_POS(PIPE_A)); @@ -513,12 +514,12 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(GAMMA_MODE(PIPE_A)); MMIO_D(GAMMA_MODE(PIPE_B)); MMIO_D(GAMMA_MODE(PIPE_C)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_C)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_A)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_B)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_C)); + MMIO_D(TRANS_MULT(display, TRANSCODER_A)); + MMIO_D(TRANS_MULT(display, TRANSCODER_B)); + MMIO_D(TRANS_MULT(display, TRANSCODER_C)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_A)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_B)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_C)); MMIO_D(SFUSE_STRAP); MMIO_D(SBI_ADDR); MMIO_D(SBI_DATA); @@ -1111,6 +1112,7 @@ static int iterate_skl_plus_mmio(struct intel_gvt_mmio_table_iter *iter) static int iterate_bxt_mmio(struct intel_gvt_mmio_table_iter *iter) { struct drm_i915_private *dev_priv = iter->i915; + struct intel_display *display = dev_priv->display; MMIO_F(_MMIO(0x80000), 0x3000); MMIO_D(GEN7_SAMPLER_INSTDONE); @@ -1242,9 +1244,9 @@ static int iterate_bxt_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(BXT_DSI_PLL_ENABLE); MMIO_D(GEN9_CLKGATE_DIS_0); MMIO_D(GEN9_CLKGATE_DIS_4); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_A)); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_B)); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_C)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_A)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_B)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_C)); MMIO_D(RC6_CTX_BASE); MMIO_D(GEN8_PUSHBUS_CONTROL); MMIO_D(GEN8_PUSHBUS_ENABLE); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c8e29fd72290..4ccba7c8ffb3 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2502,6 +2502,7 @@ static int sanity_check_mmio_access(struct intel_uncore *uncore) int intel_uncore_init_mmio(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; + struct intel_display *display = i915->display; int ret; ret = sanity_check_mmio_access(uncore); @@ -2536,7 +2537,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.read_fw_domains); GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.write_fw_domains); - if (HAS_FPGA_DBG_UNCLAIMED(i915)) + if (HAS_FPGA_DBG_UNCLAIMED(display)) uncore->flags |= UNCORE_HAS_FPGA_DBG_UNCLAIMED; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c index e07c5b380789..545f79eb0cc5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c @@ -69,17 +69,17 @@ DEFINE_SIMPLE_ATTRIBUTE(pxp_terminate_fops, pxp_terminate_get, pxp_terminate_set void intel_pxp_debugfs_register(struct intel_pxp *pxp) { - struct drm_minor *minor; + struct dentry *debugfs_root; struct dentry *pxproot; if (!intel_pxp_is_supported(pxp)) return; - minor = pxp->ctrl_gt->i915->drm.primary; - if (!minor->debugfs_root) + debugfs_root = pxp->ctrl_gt->i915->drm.debugfs_root; + if (!debugfs_root) return; - pxproot = debugfs_create_dir("pxp", minor->debugfs_root); + pxproot = debugfs_create_dir("pxp", debugfs_root); if (IS_ERR(pxproot)) return; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 41eaa9b7f67d..58bcbdcef563 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -277,13 +277,15 @@ static int live_forcewake_domains(void *arg) #define FW_RANGE 0x40000 struct intel_gt *gt = arg; struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = i915->display; unsigned long *valid; u32 offset; int err; - if (!HAS_FPGA_DBG_UNCLAIMED(gt->i915) && - !IS_VALLEYVIEW(gt->i915) && - !IS_CHERRYVIEW(gt->i915)) + if (!HAS_FPGA_DBG_UNCLAIMED(display) && + !IS_VALLEYVIEW(i915) && + !IS_CHERRYVIEW(i915)) return 0; /* diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index deb159548a09..149527827624 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_dram.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" @@ -30,10 +31,11 @@ struct dram_channel_info { #define DRAM_TYPE_STR(type) [INTEL_DRAM_ ## type] = #type -static const char *intel_dram_type_str(enum intel_dram_type type) +const char *intel_dram_type_str(enum intel_dram_type type) { static const char * const str[] = { DRAM_TYPE_STR(UNKNOWN), + DRAM_TYPE_STR(DDR2), DRAM_TYPE_STR(DDR3), DRAM_TYPE_STR(DDR4), DRAM_TYPE_STR(LPDDR3), @@ -54,9 +56,10 @@ static const char *intel_dram_type_str(enum intel_dram_type type) #undef DRAM_TYPE_STR -static bool pnv_is_ddr3(struct drm_i915_private *i915) +static enum intel_dram_type pnv_dram_type(struct drm_i915_private *i915) { - return intel_uncore_read(&i915->uncore, CSHRDDR3CTL) & CSHRDDR3CTL_DDR3; + return intel_uncore_read(&i915->uncore, CSHRDDR3CTL) & CSHRDDR3CTL_DDR3 ? + INTEL_DRAM_DDR3 : INTEL_DRAM_DDR2; } static unsigned int pnv_mem_freq(struct drm_i915_private *dev_priv) @@ -135,25 +138,21 @@ static unsigned int vlv_mem_freq(struct drm_i915_private *i915) return 0; } -static void detect_mem_freq(struct drm_i915_private *i915) +unsigned int intel_mem_freq(struct drm_i915_private *i915) { if (IS_PINEVIEW(i915)) - i915->mem_freq = pnv_mem_freq(i915); + return pnv_mem_freq(i915); else if (GRAPHICS_VER(i915) == 5) - i915->mem_freq = ilk_mem_freq(i915); + return ilk_mem_freq(i915); else if (IS_CHERRYVIEW(i915)) - i915->mem_freq = chv_mem_freq(i915); + return chv_mem_freq(i915); else if (IS_VALLEYVIEW(i915)) - i915->mem_freq = vlv_mem_freq(i915); - - if (IS_PINEVIEW(i915)) - i915->is_ddr3 = pnv_is_ddr3(i915); - - if (i915->mem_freq) - drm_dbg(&i915->drm, "DDR speed: %d kHz\n", i915->mem_freq); + return vlv_mem_freq(i915); + else + return 0; } -unsigned int i9xx_fsb_freq(struct drm_i915_private *i915) +static unsigned int i9xx_fsb_freq(struct drm_i915_private *i915) { u32 fsb; @@ -235,15 +234,30 @@ static unsigned int ilk_fsb_freq(struct drm_i915_private *dev_priv) } } -static void detect_fsb_freq(struct drm_i915_private *i915) +unsigned int intel_fsb_freq(struct drm_i915_private *i915) { if (GRAPHICS_VER(i915) == 5) - i915->fsb_freq = ilk_fsb_freq(i915); + return ilk_fsb_freq(i915); else if (GRAPHICS_VER(i915) == 3 || GRAPHICS_VER(i915) == 4) - i915->fsb_freq = i9xx_fsb_freq(i915); + return i9xx_fsb_freq(i915); + else + return 0; +} - if (i915->fsb_freq) - drm_dbg(&i915->drm, "FSB frequency: %d kHz\n", i915->fsb_freq); +static int i915_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) +{ + dram_info->fsb_freq = intel_fsb_freq(i915); + if (dram_info->fsb_freq) + drm_dbg(&i915->drm, "FSB frequency: %d kHz\n", dram_info->fsb_freq); + + dram_info->mem_freq = intel_mem_freq(i915); + if (dram_info->mem_freq) + drm_dbg(&i915->drm, "DDR speed: %d kHz\n", dram_info->mem_freq); + + if (IS_PINEVIEW(i915)) + dram_info->type = pnv_dram_type(i915); + + return 0; } static int intel_dimm_num_devices(const struct dram_dimm_info *dimm) @@ -392,6 +406,9 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram u32 val; int ret; + /* Assume 16Gb DIMMs are present until proven otherwise */ + dram_info->has_16gb_dimms = true; + val = intel_uncore_read(&i915->uncore, SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN); ret = skl_dram_get_channel_info(i915, &ch0, 0, val); @@ -414,13 +431,16 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram return -EINVAL; } - dram_info->wm_lv_0_adjust_needed = ch0.is_16gb_dimm || ch1.is_16gb_dimm; + dram_info->has_16gb_dimms = ch0.is_16gb_dimm || ch1.is_16gb_dimm; dram_info->symmetric_memory = intel_is_dram_symmetric(&ch0, &ch1); drm_dbg_kms(&i915->drm, "Memory configuration is symmetric? %s\n", str_yes_no(dram_info->symmetric_memory)); + drm_dbg_kms(&i915->drm, "16Gb DIMMs: %s\n", + str_yes_no(dram_info->has_16gb_dimms)); + return 0; } @@ -649,8 +669,9 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, static int gen11_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) { - int ret = skl_get_dram_info(i915, dram_info); + int ret; + ret = skl_dram_get_channels_info(i915, dram_info); if (ret) return ret; @@ -659,8 +680,6 @@ static int gen11_get_dram_info(struct drm_i915_private *i915, struct dram_info * static int gen12_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) { - dram_info->wm_lv_0_adjust_needed = false; - return icl_pcode_read_mem_global_info(i915, dram_info); } @@ -709,13 +728,11 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915, struct dram_info int intel_dram_detect(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; struct dram_info *dram_info; int ret; - detect_fsb_freq(i915); - detect_mem_freq(i915); - - if (GRAPHICS_VER(i915) < 9 || IS_DG2(i915) || !HAS_DISPLAY(i915)) + if (IS_DG2(i915) || !HAS_DISPLAY(display)) return 0; dram_info = drmm_kzalloc(&i915->drm, sizeof(*dram_info), GFP_KERNEL); @@ -724,13 +741,7 @@ int intel_dram_detect(struct drm_i915_private *i915) i915->dram_info = dram_info; - /* - * Assume level 0 watermark latency adjustment is needed until proven - * otherwise, this w/a is not needed by bxt/glk. - */ - dram_info->wm_lv_0_adjust_needed = !IS_BROXTON(i915) && !IS_GEMINILAKE(i915); - - if (DISPLAY_VER(i915) >= 14) + if (DISPLAY_VER(display) >= 14) ret = xelpdp_get_dram_info(i915, dram_info); else if (GRAPHICS_VER(i915) >= 12) ret = gen12_get_dram_info(i915, dram_info); @@ -738,23 +749,23 @@ int intel_dram_detect(struct drm_i915_private *i915) ret = gen11_get_dram_info(i915, dram_info); else if (IS_BROXTON(i915) || IS_GEMINILAKE(i915)) ret = bxt_get_dram_info(i915, dram_info); - else + else if (GRAPHICS_VER(i915) >= 9) ret = skl_get_dram_info(i915, dram_info); + else + ret = i915_get_dram_info(i915, dram_info); drm_dbg_kms(&i915->drm, "DRAM type: %s\n", intel_dram_type_str(dram_info->type)); + drm_dbg_kms(&i915->drm, "DRAM channels: %u\n", dram_info->num_channels); + + drm_dbg_kms(&i915->drm, "Num QGV points %u\n", dram_info->num_qgv_points); + drm_dbg_kms(&i915->drm, "Num PSF GV points %u\n", dram_info->num_psf_gv_points); + /* TODO: Do we want to abort probe on dram detection failures? */ if (ret) return 0; - drm_dbg_kms(&i915->drm, "Num qgv points %u\n", dram_info->num_qgv_points); - - drm_dbg_kms(&i915->drm, "DRAM channels: %u\n", dram_info->num_channels); - - drm_dbg_kms(&i915->drm, "Watermark level 0 adjustment needed: %s\n", - str_yes_no(dram_info->wm_lv_0_adjust_needed)); - return 0; } diff --git a/drivers/gpu/drm/i915/soc/intel_dram.h b/drivers/gpu/drm/i915/soc/intel_dram.h index 2a696e03aad4..03a973f1c941 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.h +++ b/drivers/gpu/drm/i915/soc/intel_dram.h @@ -12,11 +12,9 @@ struct drm_i915_private; struct drm_device; struct dram_info { - bool wm_lv_0_adjust_needed; - u8 num_channels; - bool symmetric_memory; enum intel_dram_type { INTEL_DRAM_UNKNOWN, + INTEL_DRAM_DDR2, INTEL_DRAM_DDR3, INTEL_DRAM_DDR4, INTEL_DRAM_LPDDR3, @@ -27,13 +25,20 @@ struct dram_info { INTEL_DRAM_GDDR_ECC, __INTEL_DRAM_TYPE_MAX, } type; + unsigned int fsb_freq; + unsigned int mem_freq; + u8 num_channels; u8 num_qgv_points; u8 num_psf_gv_points; + bool symmetric_memory; + bool has_16gb_dimms; }; void intel_dram_edram_detect(struct drm_i915_private *i915); int intel_dram_detect(struct drm_i915_private *i915); -unsigned int i9xx_fsb_freq(struct drm_i915_private *i915); +unsigned int intel_fsb_freq(struct drm_i915_private *i915); +unsigned int intel_mem_freq(struct drm_i915_private *i915); const struct dram_info *intel_dram_info(struct drm_device *drm); +const char *intel_dram_type_str(enum intel_dram_type type); #endif /* __INTEL_DRAM_H__ */ diff --git a/drivers/gpu/drm/i915/soc/intel_gmch.c b/drivers/gpu/drm/i915/soc/intel_gmch.c index 5346b8dda79a..f210c9655b53 100644 --- a/drivers/gpu/drm/i915/soc/intel_gmch.c +++ b/drivers/gpu/drm/i915/soc/intel_gmch.c @@ -148,7 +148,8 @@ void intel_gmch_bar_teardown(struct drm_i915_private *i915) int intel_gmch_vga_set_state(struct drm_i915_private *i915, bool enable_decode) { - unsigned int reg = DISPLAY_VER(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; + struct intel_display *display = i915->display; + unsigned int reg = DISPLAY_VER(display) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; u16 gmch_ctrl; if (pci_read_config_word(i915->gmch.pdev, reg, &gmch_ctrl)) { diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig index 3bfa2ac212dc..682dd2633d0c 100644 --- a/drivers/gpu/drm/imagination/Kconfig +++ b/drivers/gpu/drm/imagination/Kconfig @@ -3,8 +3,9 @@ config DRM_POWERVR tristate "Imagination Technologies PowerVR (Series 6 and later) & IMG Graphics" - depends on ARM64 + depends on (ARM64 || RISCV && 64BIT) depends on DRM + depends on MMU depends on PM select DRM_EXEC select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index 8b9ba4983c4c..294b6019b415 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -23,6 +23,7 @@ #include <linux/firmware.h> #include <linux/gfp.h> #include <linux/interrupt.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/reset.h> @@ -121,21 +122,6 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev) return 0; } -static int pvr_device_reset_init(struct pvr_device *pvr_dev) -{ - struct drm_device *drm_dev = from_pvr_device(pvr_dev); - struct reset_control *reset; - - reset = devm_reset_control_get_optional_exclusive(drm_dev->dev, NULL); - if (IS_ERR(reset)) - return dev_err_probe(drm_dev->dev, PTR_ERR(reset), - "failed to get gpu reset line\n"); - - pvr_dev->reset = reset; - - return 0; -} - /** * pvr_device_process_active_queues() - Process all queue related events. * @pvr_dev: PowerVR device to check @@ -618,6 +604,9 @@ pvr_device_init(struct pvr_device *pvr_dev) struct device *dev = drm_dev->dev; int err; + /* Get the platform-specific data based on the compatible string. */ + pvr_dev->device_data = of_device_get_match_data(dev); + /* * Setup device parameters. We do this first in case other steps * depend on them. @@ -631,8 +620,7 @@ pvr_device_init(struct pvr_device *pvr_dev) if (err) return err; - /* Get the reset line for the GPU */ - err = pvr_device_reset_init(pvr_dev); + err = pvr_dev->device_data->pwr_ops->init(pvr_dev); if (err) return err; diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index 7cb01c38d2a9..ab8f56ae15df 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -37,6 +37,9 @@ struct clk; /* Forward declaration from <linux/firmware.h>. */ struct firmware; +/* Forward declaration from <linux/pwrseq/consumer.h> */ +struct pwrseq_desc; + /** * struct pvr_gpu_id - Hardware GPU ID information for a PowerVR device * @b: Branch ID. @@ -58,6 +61,14 @@ struct pvr_fw_version { }; /** + * struct pvr_device_data - Platform specific data associated with a compatible string. + * @pwr_ops: Pointer to a structure with platform-specific power functions. + */ +struct pvr_device_data { + const struct pvr_power_sequence_ops *pwr_ops; +}; + +/** * struct pvr_device - powervr-specific wrapper for &struct drm_device */ struct pvr_device { @@ -98,6 +109,9 @@ struct pvr_device { /** @fw_version: Firmware version detected at runtime. */ struct pvr_fw_version fw_version; + /** @device_data: Pointer to platform-specific data. */ + const struct pvr_device_data *device_data; + /** @regs_resource: Resource representing device control registers. */ struct resource *regs_resource; @@ -148,6 +162,9 @@ struct pvr_device { */ struct reset_control *reset; + /** @pwrseq: Pointer to a power sequencer, if one is used. */ + struct pwrseq_desc *pwrseq; + /** @irq: IRQ number. */ int irq; diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index b058ec183bb3..916b40ced7eb 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -1480,15 +1480,33 @@ static void pvr_remove(struct platform_device *plat_dev) pvr_power_domains_fini(pvr_dev); } +static const struct pvr_device_data pvr_device_data_manual = { + .pwr_ops = &pvr_power_sequence_ops_manual, +}; + +static const struct pvr_device_data pvr_device_data_pwrseq = { + .pwr_ops = &pvr_power_sequence_ops_pwrseq, +}; + static const struct of_device_id dt_match[] = { - { .compatible = "img,img-rogue", .data = NULL }, + { + .compatible = "thead,th1520-gpu", + .data = &pvr_device_data_pwrseq, + }, + { + .compatible = "img,img-rogue", + .data = &pvr_device_data_manual, + }, /* * This legacy compatible string was introduced early on before the more generic * "img,img-rogue" was added. Keep it around here for compatibility, but never use * "img,img-axe" in new devicetrees. */ - { .compatible = "img,img-axe", .data = NULL }, + { + .compatible = "img,img-axe", + .data = &pvr_device_data_manual, + }, {} }; MODULE_DEVICE_TABLE(of, dt_match); @@ -1513,4 +1531,5 @@ MODULE_DESCRIPTION(PVR_DRIVER_DESC); MODULE_LICENSE("Dual MIT/GPL"); MODULE_IMPORT_NS("DMA_BUF"); MODULE_FIRMWARE("powervr/rogue_33.15.11.3_v1.fw"); +MODULE_FIRMWARE("powervr/rogue_36.52.104.182_v1.fw"); MODULE_FIRMWARE("powervr/rogue_36.53.104.796_v1.fw"); diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 187a07e0bd9a..c6e7ff9e935d 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -18,6 +18,7 @@ #include <linux/platform_device.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> +#include <linux/pwrseq/consumer.h> #include <linux/reset.h> #include <linux/timer.h> #include <linux/types.h> @@ -234,6 +235,118 @@ pvr_watchdog_init(struct pvr_device *pvr_dev) return 0; } +static int pvr_power_init_manual(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct reset_control *reset; + + reset = devm_reset_control_get_optional_exclusive(drm_dev->dev, NULL); + if (IS_ERR(reset)) + return dev_err_probe(drm_dev->dev, PTR_ERR(reset), + "failed to get gpu reset line\n"); + + pvr_dev->reset = reset; + + return 0; +} + +static int pvr_power_on_sequence_manual(struct pvr_device *pvr_dev) +{ + int err; + + err = clk_prepare_enable(pvr_dev->core_clk); + if (err) + return err; + + err = clk_prepare_enable(pvr_dev->sys_clk); + if (err) + goto err_core_clk_disable; + + err = clk_prepare_enable(pvr_dev->mem_clk); + if (err) + goto err_sys_clk_disable; + + /* + * According to the hardware manual, a delay of at least 32 clock + * cycles is required between de-asserting the clkgen reset and + * de-asserting the GPU reset. Assuming a worst-case scenario with + * a very high GPU clock frequency, a delay of 1 microsecond is + * sufficient to ensure this requirement is met across all + * feasible GPU clock speeds. + */ + udelay(1); + + err = reset_control_deassert(pvr_dev->reset); + if (err) + goto err_mem_clk_disable; + + return 0; + +err_mem_clk_disable: + clk_disable_unprepare(pvr_dev->mem_clk); + +err_sys_clk_disable: + clk_disable_unprepare(pvr_dev->sys_clk); + +err_core_clk_disable: + clk_disable_unprepare(pvr_dev->core_clk); + + return err; +} + +static int pvr_power_off_sequence_manual(struct pvr_device *pvr_dev) +{ + int err; + + err = reset_control_assert(pvr_dev->reset); + + clk_disable_unprepare(pvr_dev->mem_clk); + clk_disable_unprepare(pvr_dev->sys_clk); + clk_disable_unprepare(pvr_dev->core_clk); + + return err; +} + +const struct pvr_power_sequence_ops pvr_power_sequence_ops_manual = { + .init = pvr_power_init_manual, + .power_on = pvr_power_on_sequence_manual, + .power_off = pvr_power_off_sequence_manual, +}; + +static int pvr_power_init_pwrseq(struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + + pvr_dev->pwrseq = devm_pwrseq_get(dev, "gpu-power"); + if (IS_ERR(pvr_dev->pwrseq)) { + /* + * This platform requires a sequencer. If we can't get it, we + * must return the error (including -EPROBE_DEFER to wait for + * the provider to appear) + */ + return dev_err_probe(dev, PTR_ERR(pvr_dev->pwrseq), + "Failed to get required power sequencer\n"); + } + + return 0; +} + +static int pvr_power_on_sequence_pwrseq(struct pvr_device *pvr_dev) +{ + return pwrseq_power_on(pvr_dev->pwrseq); +} + +static int pvr_power_off_sequence_pwrseq(struct pvr_device *pvr_dev) +{ + return pwrseq_power_off(pvr_dev->pwrseq); +} + +const struct pvr_power_sequence_ops pvr_power_sequence_ops_pwrseq = { + .init = pvr_power_init_pwrseq, + .power_on = pvr_power_on_sequence_pwrseq, + .power_off = pvr_power_off_sequence_pwrseq, +}; + int pvr_power_device_suspend(struct device *dev) { @@ -252,11 +365,7 @@ pvr_power_device_suspend(struct device *dev) goto err_drm_dev_exit; } - clk_disable_unprepare(pvr_dev->mem_clk); - clk_disable_unprepare(pvr_dev->sys_clk); - clk_disable_unprepare(pvr_dev->core_clk); - - err = reset_control_assert(pvr_dev->reset); + err = pvr_dev->device_data->pwr_ops->power_off(pvr_dev); err_drm_dev_exit: drm_dev_exit(idx); @@ -276,53 +385,22 @@ pvr_power_device_resume(struct device *dev) if (!drm_dev_enter(drm_dev, &idx)) return -EIO; - err = clk_prepare_enable(pvr_dev->core_clk); + err = pvr_dev->device_data->pwr_ops->power_on(pvr_dev); if (err) goto err_drm_dev_exit; - err = clk_prepare_enable(pvr_dev->sys_clk); - if (err) - goto err_core_clk_disable; - - err = clk_prepare_enable(pvr_dev->mem_clk); - if (err) - goto err_sys_clk_disable; - - /* - * According to the hardware manual, a delay of at least 32 clock - * cycles is required between de-asserting the clkgen reset and - * de-asserting the GPU reset. Assuming a worst-case scenario with - * a very high GPU clock frequency, a delay of 1 microsecond is - * sufficient to ensure this requirement is met across all - * feasible GPU clock speeds. - */ - udelay(1); - - err = reset_control_deassert(pvr_dev->reset); - if (err) - goto err_mem_clk_disable; - if (pvr_dev->fw_dev.booted) { err = pvr_power_fw_enable(pvr_dev); if (err) - goto err_reset_assert; + goto err_power_off; } drm_dev_exit(idx); return 0; -err_reset_assert: - reset_control_assert(pvr_dev->reset); - -err_mem_clk_disable: - clk_disable_unprepare(pvr_dev->mem_clk); - -err_sys_clk_disable: - clk_disable_unprepare(pvr_dev->sys_clk); - -err_core_clk_disable: - clk_disable_unprepare(pvr_dev->core_clk); +err_power_off: + pvr_dev->device_data->pwr_ops->power_off(pvr_dev); err_drm_dev_exit: drm_dev_exit(idx); diff --git a/drivers/gpu/drm/imagination/pvr_power.h b/drivers/gpu/drm/imagination/pvr_power.h index ada85674a7ca..b853d092242c 100644 --- a/drivers/gpu/drm/imagination/pvr_power.h +++ b/drivers/gpu/drm/imagination/pvr_power.h @@ -41,4 +41,19 @@ pvr_power_put(struct pvr_device *pvr_dev) int pvr_power_domains_init(struct pvr_device *pvr_dev); void pvr_power_domains_fini(struct pvr_device *pvr_dev); +/** + * struct pvr_power_sequence_ops - Platform specific power sequence operations. + * @init: Pointer to the platform-specific initialization function. + * @power_on: Pointer to the platform-specific power on function. + * @power_off: Pointer to the platform-specific power off function. + */ +struct pvr_power_sequence_ops { + int (*init)(struct pvr_device *pvr_dev); + int (*power_on)(struct pvr_device *pvr_dev); + int (*power_off)(struct pvr_device *pvr_dev); +}; + +extern const struct pvr_power_sequence_ops pvr_power_sequence_ops_manual; +extern const struct pvr_power_sequence_ops pvr_power_sequence_ops_pwrseq; + #endif /* PVR_POWER_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index d5e6bab36414..eb5537f0ac90 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -394,10 +394,12 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) continue; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); + put_device(&pdev->dev); if (!drm_dev) continue; temp_drm_priv = dev_get_drvdata(drm_dev); + put_device(drm_dev); if (!temp_drm_priv) continue; diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index d7726091819c..0e2bcd5f67b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1002,6 +1002,12 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, return PTR_ERR(dsi->next_bridge); } + /* + * set flag to request the DSI host bridge be pre-enabled before device bridge + * in the chain, so the DSI host is ready when the device bridge is pre-enabled + */ + dsi->next_bridge->pre_enable_prev_first = true; + drm_bridge_add(&dsi->bridge); ret = component_add(host->dev, &mtk_dsi_component_ops); diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 845fd8aa43c3..b766dd5e6c8d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -182,8 +182,8 @@ static inline struct mtk_hdmi *hdmi_ctx_from_bridge(struct drm_bridge *b) static void mtk_hdmi_hw_vid_black(struct mtk_hdmi *hdmi, bool black) { - regmap_update_bits(hdmi->regs, VIDEO_SOURCE_SEL, - VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH); + regmap_update_bits(hdmi->regs, VIDEO_CFG_4, + VIDEO_SOURCE_SEL, black ? GEN_RGB : NORMAL_PATH); } static void mtk_hdmi_hw_make_reg_writable(struct mtk_hdmi *hdmi, bool enable) @@ -310,8 +310,8 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, static void mtk_hdmi_hw_send_aud_packet(struct mtk_hdmi *hdmi, bool enable) { - regmap_update_bits(hdmi->regs, AUDIO_PACKET_OFF, - GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF); + regmap_update_bits(hdmi->regs, GRL_SHIFT_R2, + AUDIO_PACKET_OFF, enable ? 0 : AUDIO_PACKET_OFF); } static void mtk_hdmi_hw_config_sys(struct mtk_hdmi *hdmi) diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index cbc4f37da8ba..02349bd44001 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -292,7 +292,8 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; - mtk_crtc_plane_disable(old_state->crtc, plane); + if (old_state && old_state->crtc) + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 00e1afd46b81..44df6410bce1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -913,6 +913,11 @@ static const struct adreno_info a6xx_gpus[] = { { /* sentinel */ }, }, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 185, 0 }, + { 127, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS( 0x06030001, @@ -1024,6 +1029,11 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020200, .prim_fifo_threshold = 0x00300200, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 169, 0 }, + { 113, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS(0x06030500), .family = ADRENO_6XX_GEN4, @@ -1343,6 +1353,69 @@ static const uint32_t a7xx_pwrup_reglist_regs[] = { DECLARE_ADRENO_REGLIST_LIST(a7xx_pwrup_reglist); +/* Applicable for X185, A750 */ +static const u32 a750_ifpc_reglist_regs[] = { + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), + REG_A6XX_TPL1_NC_MODE_CNTL, + REG_A6XX_SP_NC_MODE_CNTL, + REG_A6XX_CP_DBG_ECO_CNTL, + REG_A6XX_CP_PROTECT_CNTL, + REG_A6XX_CP_PROTECT(0), + REG_A6XX_CP_PROTECT(1), + REG_A6XX_CP_PROTECT(2), + REG_A6XX_CP_PROTECT(3), + REG_A6XX_CP_PROTECT(4), + REG_A6XX_CP_PROTECT(5), + REG_A6XX_CP_PROTECT(6), + REG_A6XX_CP_PROTECT(7), + REG_A6XX_CP_PROTECT(8), + REG_A6XX_CP_PROTECT(9), + REG_A6XX_CP_PROTECT(10), + REG_A6XX_CP_PROTECT(11), + REG_A6XX_CP_PROTECT(12), + REG_A6XX_CP_PROTECT(13), + REG_A6XX_CP_PROTECT(14), + REG_A6XX_CP_PROTECT(15), + REG_A6XX_CP_PROTECT(16), + REG_A6XX_CP_PROTECT(17), + REG_A6XX_CP_PROTECT(18), + REG_A6XX_CP_PROTECT(19), + REG_A6XX_CP_PROTECT(20), + REG_A6XX_CP_PROTECT(21), + REG_A6XX_CP_PROTECT(22), + REG_A6XX_CP_PROTECT(23), + REG_A6XX_CP_PROTECT(24), + REG_A6XX_CP_PROTECT(25), + REG_A6XX_CP_PROTECT(26), + REG_A6XX_CP_PROTECT(27), + REG_A6XX_CP_PROTECT(28), + REG_A6XX_CP_PROTECT(29), + REG_A6XX_CP_PROTECT(30), + REG_A6XX_CP_PROTECT(31), + REG_A6XX_CP_PROTECT(32), + REG_A6XX_CP_PROTECT(33), + REG_A6XX_CP_PROTECT(34), + REG_A6XX_CP_PROTECT(35), + REG_A6XX_CP_PROTECT(36), + REG_A6XX_CP_PROTECT(37), + REG_A6XX_CP_PROTECT(38), + REG_A6XX_CP_PROTECT(39), + REG_A6XX_CP_PROTECT(40), + REG_A6XX_CP_PROTECT(41), + REG_A6XX_CP_PROTECT(42), + REG_A6XX_CP_PROTECT(43), + REG_A6XX_CP_PROTECT(44), + REG_A6XX_CP_PROTECT(45), + REG_A6XX_CP_PROTECT(46), + REG_A6XX_CP_PROTECT(47), +}; + +DECLARE_ADRENO_REGLIST_LIST(a750_ifpc_reglist); + static const struct adreno_info a7xx_gpus[] = { { .chip_ids = ADRENO_CHIP_IDS(0x07000200), @@ -1432,14 +1505,27 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .a6xx = &(const struct a6xx_info) { .hwcg = a740_hwcg, .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7050001, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, .preempt_record_size = 4192 * SZ_1K, .speedbins = ADRENO_SPEEDBINS( @@ -1460,12 +1546,14 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .zapfw = "gen70900_zap.mbn", .a6xx = &(const struct a6xx_info) { .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, .bcms = (const struct a6xx_bcm[]) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 28e6705c6da6..fc62fef2fed8 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -93,14 +93,25 @@ bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu) /* Check to see if the GX rail is still powered */ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) { + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; /* This can be called from gpu state code so make sure GMU is valid */ if (!gmu->initialized) return false; + /* If GMU is absent, then GX power domain is ON as long as GPU is in active state */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return true; + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + if (adreno_is_a7xx(adreno_gpu)) + return !(val & + (A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | + A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); + return !(val & (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); @@ -272,6 +283,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu) if (ret) DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n"); + set_bit(GMU_STATUS_FW_START, &gmu->status); + return ret; } @@ -403,7 +416,10 @@ int a6xx_sptprac_enable(struct a6xx_gmu *gmu) int ret; u32 val; - if (!gmu->legacy) + WARN_ON(!gmu->legacy); + + /* Nothing to do if GMU does the power management */ + if (gmu->idle_level > GMU_IDLE_STATE_ACTIVE) return 0; gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000); @@ -518,6 +534,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu) int ret; u32 val; + if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status)) + return 0; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1)); ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val, @@ -545,6 +564,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) int ret; u32 val; + if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status)) + return; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1); ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, @@ -553,6 +575,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n"); gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); + + set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status); } static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value) @@ -681,8 +705,6 @@ setup_pdc: /* ensure no writes happen before the uCode is fully written */ wmb(); - a6xx_rpmh_stop(gmu); - err: if (!IS_ERR_OR_NULL(pdcptr)) iounmap(pdcptr); @@ -842,19 +864,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) else gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); - if (state == GMU_WARM_BOOT) { - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - } else { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + + if (state == GMU_COLD_BOOT) { if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU], "GMU firmware is not loaded\n")) return -ENOENT; - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - ret = a6xx_gmu_fw_load(gmu); if (ret) return ret; @@ -925,10 +943,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ret = a6xx_gmu_gfx_rail_on(gmu); if (ret) return ret; - } - /* Enable SPTP_PC if the CPU is responsible for it */ - if (gmu->idle_level < GMU_IDLE_STATE_SPTP) { ret = a6xx_sptprac_enable(gmu); if (ret) return ret; @@ -980,6 +995,22 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) val, (val & 1), 100, 10000); gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS + seqmem_off, val, (val & 1), 100, 1000); + + if (!adreno_is_a740_family(adreno_gpu)) + return; + + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS4_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS5_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS6_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS7_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS8_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS9_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); } /* Force the GMU off in case it isn't responsive */ @@ -1023,6 +1054,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) /* Reset GPU core blocks */ a6xx_gpu_sw_reset(gpu, true); + + a6xx_rpmh_stop(gmu); } static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) @@ -1128,6 +1161,11 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Set the GPU to the current freq */ a6xx_gmu_set_initial_freq(gpu, gmu); + if (refcount_read(&gpu->sysprof_active) > 1) { + ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + if (!ret) + set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); + } out: /* On failure, shut down the GMU to leave it in a good state */ if (ret) { @@ -1175,6 +1213,9 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); } + if (test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + ret = a6xx_gmu_wait_for_idle(gmu); /* If the GMU isn't responding assume it is hung */ @@ -1318,8 +1359,6 @@ static int a6xx_gmu_memory_probe(struct drm_device *drm, struct a6xx_gmu *gmu) struct msm_mmu *mmu; mmu = msm_iommu_new(gmu->dev, 0); - if (!mmu) - return -ENODEV; if (IS_ERR(mmu)) return PTR_ERR(mmu); @@ -1692,6 +1731,7 @@ static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu) u32 val; freq = gmu->gpu_freqs[i]; + /* This is unlikely to fail because we are passing back a known freq */ opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true); np = dev_pm_opp_get_of_node(opp); @@ -1790,6 +1830,35 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, return irq; } +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned int sysprof_active; + + /* Nothing to do if GPU is suspended. We will handle this during GMU resume */ + if (!pm_runtime_get_if_active(&gpu->pdev->dev)) + return; + + mutex_lock(&gmu->lock); + + sysprof_active = refcount_read(&gpu->sysprof_active); + + /* + * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that, + * use the currently unused perfcounter oob vote to block IFPC when sysprof is active + */ + if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + + mutex_unlock(&gmu->lock); + + pm_runtime_put(&gpu->pdev->dev); +} + void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -1932,8 +2001,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) return ret; - /* Fow now, don't do anything fancy until we get our feet under us */ - gmu->idle_level = GMU_IDLE_STATE_ACTIVE; + /* Set GMU idle level */ + gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ? + GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE; pm_runtime_enable(gmu->dev); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index d1ce11131ba6..06cfc294016f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -50,6 +50,9 @@ struct a6xx_bcm { /* The GMU does not do any idle state management */ #define GMU_IDLE_STATE_ACTIVE 0 +/* Unknown power state. Not exposed by the firmware. For documentation purpose only */ +#define GMU_IDLE_STATE_RESERVED 1 + /* The GMU manages SPTP power collapse */ #define GMU_IDLE_STATE_SPTP 2 @@ -117,6 +120,14 @@ struct a6xx_gmu { struct qmp *qmp; struct a6xx_hfi_msg_bw_table *bw_table; + +/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */ +#define GMU_STATUS_FW_START 0 +/* To track if PDC sleep seq was done */ +#define GMU_STATUS_PDC_SLEEP 1 +/* To track Perfcounter OOB set status */ +#define GMU_STATUS_OOB_PERF_SET 2 + unsigned long status; }; static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) @@ -158,6 +169,9 @@ static inline u64 gmu_read64(struct a6xx_gmu *gmu, u32 lo, u32 hi) #define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \ readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \ interval, timeout) +#define gmu_poll_timeout_atomic(gmu, addr, val, cond, interval, timeout) \ + readl_poll_timeout_atomic((gmu)->mmio + ((addr) << 2), val, cond, \ + interval, timeout) static inline u32 gmu_read_rscc(struct a6xx_gmu *gmu, u32 offset) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 45dd5fd1c2bf..b8f8ae940b55 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -16,6 +16,97 @@ #define GPU_PAS_ID 13 +static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) +{ + u64 count_hi, count_lo, temp; + + do { + count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); + temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + } while (unlikely(count_hi != temp)); + + return (count_hi << 32) | count_lo; +} + +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) +{ + /* Success if !writedropped0/1 */ + if (!(status & mask)) + return true; + + udelay(10); + + /* Try to update fenced register again */ + gpu_write(gpu, offset, value); + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + return false; +} + +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + gpu_write(gpu, offset, value); + + /* Nothing else to be done in the case of no-GMU */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) + return 0; + + /* Try again for another 1ms before failing */ + gpu_write(gpu, offset, value); + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { + /* + * The 'delay' warning is here because the pause to print this + * warning will allow gpu to move to power collapse which + * defeats the purpose of continuous polling for 2 ms + */ + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", + offset); + return 0; + } + + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", + offset); + + return -ETIMEDOUT; +} + +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) +{ + int ret; + + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); + if (ret) + return ret; + + if (!is_64b) + return 0; + + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); + + return ret; +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else ring->restore_wptr = true; } else { @@ -173,8 +264,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * Needed for preemption */ OUT_PKT7(ring, CP_MEM_WRITE, 5); - OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); - OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); OUT_RING(ring, upper_32_bits(ttbr)); OUT_RING(ring, ctx->seqno); @@ -204,9 +295,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, */ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); - OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); - OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); @@ -298,8 +389,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); } @@ -499,8 +589,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) } - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); @@ -739,11 +828,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) u32 *dest = (u32 *)&lock->regs[0]; int i; - reglist = adreno_gpu->info->a6xx->pwrup_reglist; - lock->gpu_req = lock->cpu_req = lock->turn = 0; - lock->ifpc_list_len = 0; - lock->preemption_list_len = reglist->count; + + reglist = adreno_gpu->info->a6xx->ifpc_reglist; + lock->ifpc_list_len = reglist->count; /* * For each entry in each of the lists, write the offset and the current @@ -754,6 +842,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) *dest++ = gpu_read(gpu, reglist->regs[i]); } + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + lock->preemption_list_len = reglist->count; + + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + /* * The overall register list is composed of * 1. Static IFPC-only registers @@ -1241,14 +1337,14 @@ static int hw_init(struct msm_gpu *gpu) /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); } @@ -1448,21 +1544,25 @@ static void a6xx_recover(struct msm_gpu *gpu) adreno_dump_info(gpu); - for (i = 0; i < 8; i++) - DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { + /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + + for (i = 0; i < 8; i++) + DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); - if (hang_debug) - a6xx_dump(gpu); + if (hang_debug) + a6xx_dump(gpu); + + } /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ - a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + a6xx_gpu->hung = true; pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1693,8 +1793,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) static void a6xx_fault_detect_irq(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); /* @@ -1706,13 +1804,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return; - /* - * Force the GPU to stay on until after we finish - * collecting information - */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); - DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, @@ -1727,6 +1818,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); + /* Turn off interrupts to avoid triggering recovery again */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); + kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1751,9 +1845,49 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) } } +static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); +} + +static int irq_poll_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { + u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", + status, rbbm_unmasked); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; + + /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ + a6xx_gpu_keepalive_vote(gpu, true); + + if (irq_poll_fence(gpu)) + goto done; + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); @@ -1790,6 +1924,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); +done: + a6xx_gpu_keepalive_vote(gpu, false); + return IRQ_HANDLED; } @@ -2179,16 +2316,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - mutex_lock(&a6xx_gpu->gmu.lock); - - /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); - - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - mutex_unlock(&a6xx_gpu->gmu.lock); + *value = read_gmu_ao_counter(a6xx_gpu); return 0; } @@ -2298,18 +2426,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) return a6xx_gpu->shadow[ring->id]; + /* + * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware + * without 'whereami' support + */ + WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), + "Can't read CP_RB_RPTR register reliably\n"); + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { - struct msm_cp_state cp_state = { + struct msm_cp_state cp_state; + bool progress; + + /* + * With IFPC, KMD doesn't know whether GX power domain is collapsed + * or not. So, we can't blindly read the below registers in GX domain. + * Lets trust the hang detection in HW and lie to the caller that + * there was progress. + */ + if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) + return true; + + cp_state = (struct msm_cp_state) { .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), }; - bool progress; /* * Adjust the remaining data to account for what has already been @@ -2408,6 +2554,7 @@ static const struct adreno_gpu_funcs funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; @@ -2468,6 +2615,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 6e71f617fc3d..0b17d36c36a9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -45,6 +45,7 @@ struct a6xx_info { const struct adreno_reglist *hwcg; const struct adreno_protect *protect; const struct adreno_reglist_list *pwrup_reglist; + const struct adreno_reglist_list *ifpc_reglist; u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; @@ -254,6 +255,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu); void a6xx_preempt_init(struct msm_gpu *gpu); void a6xx_preempt_hw_init(struct msm_gpu *gpu); @@ -295,5 +297,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state); void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b); #endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index faca2a0243ab..4c7f3c642f6a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -11,7 +11,7 @@ static const unsigned int *gen7_0_0_external_core_regs[] __always_unused; static const unsigned int *gen7_2_0_external_core_regs[] __always_unused; static const unsigned int *gen7_9_0_external_core_regs[] __always_unused; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused; #include "adreno_gen7_0_0_snapshot.h" @@ -174,8 +174,15 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | - A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } else { + reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -198,11 +205,18 @@ static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, readl((ptr) + ((offset) << 2)) /* read a value from the CX debug bus */ -static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, +static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | - A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } else { + reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -315,7 +329,8 @@ static void a6xx_get_debugbus_block(struct msm_gpu *gpu, ptr += debugbus_read(gpu, block->id, i, ptr); } -static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, +static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu, + void __iomem *cxdbg, struct a6xx_gpu_state *a6xx_state, const struct a6xx_debugbus_block *block, struct a6xx_gpu_state_obj *obj) @@ -330,7 +345,7 @@ static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, obj->handle = block; for (ptr = obj->data, i = 0; i < block->count; i++) - ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); + ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr); } static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, @@ -423,8 +438,9 @@ static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], &a6xx_state->debugbus[i + debugbus_blocks_count]); } - } + a6xx_state->nr_debugbus = total_debugbus_blocks; + } } static void a6xx_get_debugbus(struct msm_gpu *gpu, @@ -526,7 +542,8 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, int i; for (i = 0; i < nr_cx_debugbus_blocks; i++) - a6xx_get_cx_debugbus_block(cxdbg, + a6xx_get_cx_debugbus_block(gpu, + cxdbg, a6xx_state, &cx_debugbus_blocks[i], &a6xx_state->cx_debugbus[i]); @@ -759,15 +776,15 @@ static void a7xx_get_cluster(struct msm_gpu *gpu, size_t datasize; int i, regcount = 0; - /* Some clusters need a selector register to be programmed too */ - if (cluster->sel) - in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); - in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); + /* Some clusters need a selector register to be programmed too */ + if (cluster->sel) + in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); + for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { int count = RANGE(cluster->regs, i); @@ -1569,8 +1586,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), GFP_KERNEL); - bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & - A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); + bool stalled; if (!a6xx_state) return ERR_PTR(-ENOMEM); @@ -1591,15 +1607,20 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) } /* If GX isn't on the rest of the data isn't going to be accessible */ - if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) return &a6xx_state->base; + /* Halt SQE first */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + /* Get the banks of indexed registers */ if (adreno_is_a7xx(adreno_gpu)) a7xx_get_indexed_registers(gpu, a6xx_state); else a6xx_get_indexed_registers(gpu, a6xx_state); + stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & + A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); /* * Try to initialize the crashdumper, if we are not dumping state * with the SMMU stalled. The crashdumper needs memory access to @@ -1796,6 +1817,7 @@ static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); + drm_printf(p, " - location: %d\n", block->location); for (i = 0; i < block->num_sps; i++) { drm_printf(p, " - sp: %d\n", i); @@ -1873,6 +1895,7 @@ static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); drm_printf(p, " - context: %d\n", dbgahb->context_id); + drm_printf(p, " - location: %d\n", dbgahb->location_id); a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); } } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index 95d93ac6812a..1c18499b60bb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -419,47 +419,47 @@ static const struct a6xx_indexed_registers a6xx_indexed_reglist[] = { REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a6xx_get_cp_roq_size}, }; static const struct a6xx_indexed_registers a7xx_indexed_reglist[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, - REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, + REG_A6XX_CP_SQE_STAT_DATA, 0x40, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, - REG_A7XX_CP_BV_SQE_STAT_DATA, 0x33, NULL }, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, + REG_A7XX_CP_BV_SQE_STAT_DATA, 0x40, NULL }, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, - REG_A7XX_CP_SQE_AC_STAT_DATA, 0x33, NULL }, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, + REG_A7XX_CP_SQE_AC_STAT_DATA, 0x40, NULL }, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x40, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a7xx_get_cp_roq_size }, }; static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = { - "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060, NULL, }; static const struct a6xx_indexed_registers a7xx_cp_bv_mempool_indexed[] = { - { "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, - REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2100, NULL }, - { "CP_BV_MEMPOOL", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, - REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2100, NULL }, + { "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2200, NULL }, + { "CP_BV_MEM_POOL_DBG", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, + REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2200, NULL }, }; #define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 8e69b1e84657..550de6ad68ef 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -21,6 +21,7 @@ static const char * const a6xx_hfi_msg_id[] = { HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE), HFI_MSG_ID(HFI_H2F_MSG_TEST), HFI_MSG_ID(HFI_H2F_MSG_START), + HFI_MSG_ID(HFI_H2F_FEATURE_CTRL), HFI_MSG_ID(HFI_H2F_MSG_CORE_FW_START), HFI_MSG_ID(HFI_H2F_MSG_GX_BW_PERF_VOTE), HFI_MSG_ID(HFI_H2F_MSG_PREPARE_SLUMBER), @@ -765,23 +766,40 @@ send: NULL, 0); } +static int a6xx_hfi_feature_ctrl_msg(struct a6xx_gmu *gmu, u32 feature, u32 enable, u32 data) +{ + struct a6xx_hfi_msg_feature_ctrl msg = { + .feature = feature, + .enable = enable, + .data = data, + }; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); +} + +#define HFI_FEATURE_IFPC 9 +#define IFPC_LONG_HYST 0x1680 + +static int a6xx_hfi_enable_ifpc(struct a6xx_gmu *gmu) +{ + if (gmu->idle_level != GMU_IDLE_STATE_IFPC) + return 0; + + return a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_IFPC, 1, IFPC_LONG_HYST); +} + #define HFI_FEATURE_ACD 12 static int a6xx_hfi_enable_acd(struct a6xx_gmu *gmu) { struct a6xx_hfi_acd_table *acd_table = &gmu->acd_table; - struct a6xx_hfi_msg_feature_ctrl msg = { - .feature = HFI_FEATURE_ACD, - .enable = 1, - .data = 0, - }; int ret; if (!acd_table->enable_by_level) return 0; /* Enable ACD feature at GMU */ - ret = a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); + ret = a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_ACD, 1, 0); if (ret) { DRM_DEV_ERROR(gmu->dev, "Unable to enable ACD (%d)\n", ret); return ret; @@ -898,6 +916,10 @@ int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) if (ret) return ret; + ret = a6xx_hfi_enable_ifpc(gmu); + if (ret) + return ret; + ret = a6xx_hfi_send_core_fw_start(gmu); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index 6a12a35dabff..afc5f4aa3b17 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu, } /* Write the most recent wptr for the given ring into the hardware */ -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring) { unsigned long flags; uint32_t wptr; @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (ring->restore_wptr) { wptr = get_wptr(ring); - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); ring->restore_wptr = false; } @@ -111,9 +111,9 @@ static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); - postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); - postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0); postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); @@ -136,6 +136,21 @@ static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) a6xx_gpu->postamble_enabled = false; } +/* + * Set preemption keepalive vote. Please note that this vote is different from the one used in + * a6xx_irq() + */ +static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on); +} + void a6xx_preempt_irq(struct msm_gpu *gpu) { uint32_t status; @@ -172,10 +187,12 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); + a6xx_preempt_keepalive_vote(gpu, false); + trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); /* @@ -268,7 +285,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) */ if (!ring || (a6xx_gpu->cur_ring == ring)) { set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); return; @@ -302,13 +319,16 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) spin_unlock_irqrestore(&ring->preempt_lock, flags); - gpu_write64(gpu, - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, - a6xx_gpu->preempt_smmu_iova[ring->id]); + /* Set the keepalive bit to keep the GPU ON until preemption is complete */ + a6xx_preempt_keepalive_vote(gpu, true); + + a6xx_fenced_write(a6xx_gpu, + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id], + BIT(1), true); - gpu_write64(gpu, + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, - a6xx_gpu->preempt_iova[ring->id]); + a6xx_gpu->preempt_iova[ring->id], BIT(1), true); a6xx_gpu->next_ring = ring; @@ -328,7 +348,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); /* Trigger the preemption */ - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false); } static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 50945bfe9b49..28f744f3caf7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -24,6 +24,10 @@ bool disable_acd; MODULE_PARM_DESC(disable_acd, "Forcefully disable GPU ACD"); module_param_unsafe(disable_acd, bool, 0400); +static bool skip_gpu; +MODULE_PARM_DESC(no_gpu, "Disable GPU driver register (0=enable GPU driver register (default), 1=skip GPU driver register"); +module_param(skip_gpu, bool, 0400); + extern const struct adreno_gpulist a2xx_gpulist; extern const struct adreno_gpulist a3xx_gpulist; extern const struct adreno_gpulist a4xx_gpulist; @@ -184,6 +188,9 @@ bool adreno_has_gpu(struct device_node *node) uint32_t chip_id; int ret; + if (skip_gpu) + return false; + ret = find_chipid(node, &chip_id); if (ret) return false; @@ -404,10 +411,16 @@ static struct platform_driver adreno_driver = { void __init adreno_register(void) { + if (skip_gpu) + return; + platform_driver_register(&adreno_driver); } void __exit adreno_unregister(void) { + if (skip_gpu) + return; + platform_driver_unregister(&adreno_driver); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h index cb66ece6606b..04b49d385f9d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h @@ -81,7 +81,7 @@ static const u32 gen7_0_0_debugbus_blocks[] = { A7XX_DBGBUS_USPTP_7, }; -static struct gen7_shader_block gen7_0_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_0_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -668,12 +668,19 @@ static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8)); -/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ -static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_NONE */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_none_registers[] = { 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621, 0x0b630, 0x0b633, UINT_MAX, UINT_MAX, }; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_none_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { + 0x0b600, 0x0b600, + UINT_MAX, UINT_MAX, +}; static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8)); /* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_LPAC */ @@ -695,7 +702,7 @@ static const struct gen7_sel_reg gen7_0_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_0_0_clusters[] = { +static const struct gen7_cluster_registers gen7_0_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_0_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -764,7 +771,7 @@ static struct gen7_cluster_registers gen7_0_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -914,7 +921,7 @@ static const u32 gen7_0_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_0_0_reg_list[] = { +static const struct gen7_reg_list gen7_0_0_reg_list[] = { { gen7_0_0_gpu_registers, NULL }, { gen7_0_0_cx_misc_registers, NULL }, { gen7_0_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h index 6f8ad50f32ce..772652eb61f3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h @@ -95,7 +95,7 @@ static const u32 gen7_2_0_debugbus_blocks[] = { A7XX_DBGBUS_CCHE_2, }; -static struct gen7_shader_block gen7_2_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_2_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -489,7 +489,7 @@ static const struct gen7_sel_reg gen7_2_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_2_0_clusters[] = { +static const struct gen7_cluster_registers gen7_2_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_2_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -558,7 +558,7 @@ static struct gen7_cluster_registers gen7_2_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -573,6 +573,8 @@ static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_NONE, 0, A7XX_USPTP, + gen7_0_0_tpl1_noncontext_pipe_none_registers, 0xb600 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_LPAC, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, { A7XX_CLUSTER_SP_PS, A7XX_SP_CTX0_3D_CPS_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, @@ -737,7 +739,7 @@ static const u32 gen7_2_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_2_0_reg_list[] = { +static const struct gen7_reg_list gen7_2_0_reg_list[] = { { gen7_2_0_gpu_registers, NULL }, { gen7_2_0_cx_misc_registers, NULL }, { gen7_2_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index e02cabb39f19..0956dfca1f05 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -117,7 +117,7 @@ static const u32 gen7_9_0_cx_debugbus_blocks[] = { A7XX_DBGBUS_GBIF_CX, }; -static struct gen7_shader_block gen7_9_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_9_0_shader_blocks[] = { { A7XX_TP0_TMO_DATA, 0x0200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_SMO_DATA, 0x0080, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_MIPMAP_BASE_DATA, 0x03C0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, @@ -1116,7 +1116,7 @@ static const struct gen7_sel_reg gen7_9_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_9_0_clusters[] = { +static const struct gen7_cluster_registers gen7_9_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_9_0_non_context_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -1185,7 +1185,7 @@ static struct gen7_cluster_registers gen7_9_0_clusters[] = { gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -1294,34 +1294,34 @@ static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, }; -static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { +static const struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, REG_A6XX_CP_SQE_STAT_DATA, 0x00040}, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x00200}, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0x00800}, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x00200}, - { "CP_BV_ROQ_DBG_ADDR", REG_A7XX_CP_BV_ROQ_DBG_ADDR, + { "CP_BV_ROQ_DBG", REG_A7XX_CP_BV_ROQ_DBG_ADDR, REG_A7XX_CP_BV_ROQ_DBG_DATA, 0x00800}, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, REG_A7XX_CP_BV_SQE_STAT_DATA, 0x00040}, - { "CP_RESOURCE_TBL", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, + { "CP_RESOURCE_TABLE_DBG", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, REG_A7XX_CP_RESOURCE_TABLE_DBG_DATA, 0x04100}, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x00200}, - { "CP_LPAC_ROQ", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, + { "CP_LPAC_ROQ_DBG", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, REG_A7XX_CP_LPAC_ROQ_DBG_DATA, 0x00200}, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x08000}, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, REG_A7XX_CP_SQE_AC_STAT_DATA, 0x00040}, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x00040}, { "CP_AQE_ROQ_0", REG_A7XX_CP_AQE_ROQ_DBG_ADDR_0, REG_A7XX_CP_AQE_ROQ_DBG_DATA_0, 0x00100}, @@ -1337,7 +1337,7 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A7XX_CP_AQE_STAT_DATA_1, 0x00040}, }; -static struct gen7_reg_list gen7_9_0_reg_list[] = { +static const struct gen7_reg_list gen7_9_0_reg_list[] = { { gen7_9_0_gpu_registers, NULL}, { gen7_9_0_cx_misc_registers, NULL}, { gen7_9_0_cx_dbgc_registers, NULL}, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f1230465bf0d..afaa3cfefd35 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -10,7 +10,7 @@ #include <linux/interconnect.h> #include <linux/firmware/qcom/qcom_scm.h> #include <linux/kernel.h> -#include <linux/of_address.h> +#include <linux/of_reserved_mem.h> #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> @@ -33,7 +33,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, struct device *dev = &gpu->pdev->dev; const struct firmware *fw; const char *signed_fwname = NULL; - struct device_node *np, *mem_np; + struct device_node *np; struct resource r; phys_addr_t mem_phys; ssize_t mem_size; @@ -51,18 +51,11 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, return -ENODEV; } - mem_np = of_parse_phandle(np, "memory-region", 0); - of_node_put(np); - if (!mem_np) { + ret = of_reserved_mem_region_to_resource(np, 0, &r); + if (ret) { zap_available = false; - return -EINVAL; - } - - ret = of_address_to_resource(mem_np, 0, &r); - of_node_put(mem_np); - if (ret) return ret; - + } mem_phys = r.start; /* @@ -209,9 +202,7 @@ adreno_iommu_create_vm(struct msm_gpu *gpu, u64 start, size; mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); - if (!mmu) - return ERR_PTR(-ENODEV); - else if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return ERR_CAST(mmu); geometry = msm_iommu_get_geometry(mmu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 9dc93c247196..390fa6720d9b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -59,6 +59,7 @@ enum adreno_family { #define ADRENO_QUIRK_HAS_CACHED_COHERENT BIT(4) #define ADRENO_QUIRK_PREEMPTION BIT(5) #define ADRENO_QUIRK_4GB_VA BIT(6) +#define ADRENO_QUIRK_IFPC BIT(7) /* Helper for formating the chip_id in the way that userspace tools like * crashdec expect. diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 0fb5789c60d0..13cc658065c5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,6 +32,26 @@ enum dpu_perf_mode { }; /** + * dpu_core_perf_adjusted_mode_clk - Adjust given mode clock rate according to + * the perf clock factor. + * @crtc_clk_rate - Unadjusted mode clock rate + * @perf_cfg: performance configuration + */ +u64 dpu_core_perf_adjusted_mode_clk(u64 mode_clk_rate, + const struct dpu_perf_cfg *perf_cfg) +{ + u32 clk_factor; + + clk_factor = perf_cfg->clk_inefficiency_factor; + if (clk_factor) { + mode_clk_rate *= clk_factor; + do_div(mode_clk_rate, 100); + } + + return mode_clk_rate; +} + +/** * _dpu_core_perf_calc_bw() - to calculate BW per crtc * @perf_cfg: performance configuration * @crtc: pointer to a crtc @@ -75,28 +95,21 @@ static u64 _dpu_core_perf_calc_clk(const struct dpu_perf_cfg *perf_cfg, struct drm_plane *plane; struct dpu_plane_state *pstate; struct drm_display_mode *mode; - u64 crtc_clk; - u32 clk_factor; + u64 mode_clk; mode = &state->adjusted_mode; - crtc_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); + mode_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); drm_atomic_crtc_for_each_plane(plane, crtc) { pstate = to_dpu_plane_state(plane->state); if (!pstate) continue; - crtc_clk = max(pstate->plane_clk, crtc_clk); - } - - clk_factor = perf_cfg->clk_inefficiency_factor; - if (clk_factor) { - crtc_clk *= clk_factor; - do_div(crtc_clk, 100); + mode_clk = max(pstate->plane_clk, mode_clk); } - return crtc_clk; + return dpu_core_perf_adjusted_mode_clk(mode_clk, perf_cfg); } static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h index d2f21d34e501..3740bc97422c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h @@ -54,6 +54,9 @@ struct dpu_core_perf { u32 fix_core_ab_vote; }; +u64 dpu_core_perf_adjusted_mode_clk(u64 clk_rate, + const struct dpu_perf_cfg *perf_cfg); + int dpu_core_perf_crtc_check(struct drm_crtc *crtc, struct drm_crtc_state *state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index d4b545448d74..4b970a59deaf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -377,11 +377,10 @@ static void _dpu_crtc_setup_blend_cfg(struct dpu_crtc_mixer *mixer, static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) { struct dpu_crtc_state *crtc_state; - int lm_idx, lm_horiz_position; + int lm_idx; crtc_state = to_dpu_crtc_state(crtc->state); - lm_horiz_position = 0; for (lm_idx = 0; lm_idx < crtc_state->num_mixers; lm_idx++) { const struct drm_rect *lm_roi = &crtc_state->lm_bounds[lm_idx]; struct dpu_hw_mixer *hw_lm = crtc_state->mixers[lm_idx].hw_lm; @@ -392,7 +391,7 @@ static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) cfg.out_width = drm_rect_width(lm_roi); cfg.out_height = drm_rect_height(lm_roi); - cfg.right_mixer = lm_horiz_position++; + cfg.right_mixer = lm_idx & 0x1; cfg.flags = 0; hw_lm->ops.setup_mixer_out(hw_lm, &cfg); } @@ -596,7 +595,7 @@ static void _dpu_crtc_complete_flip(struct drm_crtc *crtc) spin_lock_irqsave(&dev->event_lock, flags); if (dpu_crtc->event) { - DRM_DEBUG_VBL("%s: send event: %pK\n", dpu_crtc->name, + DRM_DEBUG_VBL("%s: send event: %p\n", dpu_crtc->name, dpu_crtc->event); trace_dpu_crtc_complete_flip(DRMID(crtc)); drm_crtc_send_vblank_event(crtc, dpu_crtc->event); @@ -1534,6 +1533,7 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) { struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); + u64 adjusted_mode_clk; /* if there is no 3d_mux block we cannot merge LMs so we cannot * split the large layer into 2 LMs, filter out such modes @@ -1541,6 +1541,17 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, if (!dpu_kms->catalog->caps->has_3d_merge && mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) return MODE_BAD_HVALUE; + + adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock, + dpu_kms->perf.perf_cfg); + + /* + * The given mode, adjusted for the perf clock factor, should not exceed + * the max core clock rate + */ + if (dpu_kms->perf.max_core_clk_rate < adjusted_mode_clk * 1000) + return MODE_CLOCK_HIGH; + /* * max crtc width is equal to the max mixer width * 2 and max height is 4K */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 05e5f3463e30..258edaa18fc0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -730,6 +730,8 @@ bool dpu_encoder_needs_modeset(struct drm_encoder *drm_enc, struct drm_atomic_st return false; conn_state = drm_atomic_get_new_connector_state(state, connector); + if (!conn_state) + return false; /** * These checks are duplicated from dpu_encoder_update_topology() since diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 56a5b596554d..46f348972a97 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -446,7 +446,7 @@ static void _dpu_encoder_phys_wb_handle_wbdone_timeout( static int dpu_encoder_phys_wb_wait_for_commit_done( struct dpu_encoder_phys *phys_enc) { - unsigned long ret; + int ret; struct dpu_encoder_wait_info wait_info; struct dpu_encoder_phys_wb *wb_enc = to_dpu_encoder_phys_wb(phys_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index e824cd64fd3f..6641455c4ec6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -338,7 +338,6 @@ static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); *************************************************************/ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x50, 0x80, 0xb0, 0x230, @@ -347,7 +346,6 @@ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { }; static const struct dpu_lm_sub_blks sdm845_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, @@ -356,7 +354,6 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = { }; static const struct dpu_lm_sub_blks sc7180_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0 @@ -364,7 +361,6 @@ static const struct dpu_lm_sub_blks sc7180_lm_sblk = { }; static const struct dpu_lm_sub_blks sm8750_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ /* 0x40 + n*0x30 */ @@ -374,7 +370,6 @@ static const struct dpu_lm_sub_blks sm8750_lm_sblk = { }; static const struct dpu_lm_sub_blks qcm2290_lm_sblk = { - .maxwidth = DEFAULT_DPU_LINE_WIDTH, .maxblendstages = 4, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68 diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index a78bb2c334e3..f0768f54e9b3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -307,7 +307,6 @@ struct dpu_sspp_sub_blks { * @blendstage_base: Blend-stage register base offset */ struct dpu_lm_sub_blks { - u32 maxwidth; u32 maxblendstages; u32 blendstage_base[MAX_BLOCKS]; }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 11fb1bc54fa9..54b20faa0b69 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -31,14 +31,14 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, u32 base; if (!ctx) { - DRM_ERROR("invalid ctx %pK\n", ctx); + DRM_ERROR("invalid ctx %p\n", ctx); return; } base = ctx->cap->sblk->pcc.base; if (!base) { - DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); + DRM_ERROR("invalid ctx %p pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 12dcb32b4724..4e5a8ecd31f7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1110,7 +1110,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) { struct drm_gpuvm *vm; - vm = msm_kms_init_vm(dpu_kms->dev); + vm = msm_kms_init_vm(dpu_kms->dev, dpu_kms->dev->dev->parent); if (IS_ERR(vm)) return PTR_ERR(vm); @@ -1345,7 +1345,7 @@ static int dpu_kms_mmap_mdp5(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap_mdss(mdss_dev, dpu_kms->pdev, @@ -1380,7 +1380,7 @@ static int dpu_kms_mmap_dpu(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap(pdev, "vbif"); if (IS_ERR(dpu_kms->vbif[VBIF_RT])) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 01171c535a27..f54cf0faa1c7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -922,6 +922,9 @@ static int dpu_plane_is_multirect_capable(struct dpu_hw_sspp *sspp, if (MSM_FORMAT_IS_YUV(fmt)) return false; + if (!sspp) + return true; + if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) && !test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features)) return false; @@ -1028,6 +1031,7 @@ static int dpu_plane_try_multirect_shared(struct dpu_plane_state *pstate, prev_pipe->multirect_mode != DPU_SSPP_MULTIRECT_NONE) return false; + /* Do not validate SSPP of current plane when it is not ready */ if (!dpu_plane_is_multirect_capable(pipe->sspp, pipe_cfg, fmt) || !dpu_plane_is_multirect_capable(prev_pipe->sspp, prev_pipe_cfg, prev_fmt)) return false; @@ -1129,7 +1133,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *crtc_state = NULL; int ret; if (IS_ERR(plane_state)) @@ -1162,7 +1166,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, if (!old_plane_state || !old_plane_state->fb || old_plane_state->src_w != plane_state->src_w || old_plane_state->src_h != plane_state->src_h || - old_plane_state->src_w != plane_state->src_w || + old_plane_state->crtc_w != plane_state->crtc_w || old_plane_state->crtc_h != plane_state->crtc_h || msm_framebuffer_format(old_plane_state->fb) != msm_framebuffer_format(plane_state->fb)) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 25382120cb1a..2c77c74fac0f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -865,6 +865,21 @@ void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id); } +static char *dpu_hw_blk_type_name[] = { + [DPU_HW_BLK_TOP] = "TOP", + [DPU_HW_BLK_SSPP] = "SSPP", + [DPU_HW_BLK_LM] = "LM", + [DPU_HW_BLK_CTL] = "CTL", + [DPU_HW_BLK_PINGPONG] = "pingpong", + [DPU_HW_BLK_INTF] = "INTF", + [DPU_HW_BLK_WB] = "WB", + [DPU_HW_BLK_DSPP] = "DSPP", + [DPU_HW_BLK_MERGE_3D] = "merge_3d", + [DPU_HW_BLK_DSC] = "DSC", + [DPU_HW_BLK_CDM] = "CDM", + [DPU_HW_BLK_MAX] = "unknown", +}; + /** * dpu_rm_get_assigned_resources - Get hw resources of the given type that are * assigned to this encoder @@ -946,13 +961,13 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, } if (num_blks == blks_size) { - DPU_ERROR("More than %d resources assigned to crtc %d\n", - blks_size, crtc_id); + DPU_ERROR("More than %d %s assigned to crtc %d\n", + blks_size, dpu_hw_blk_type_name[type], crtc_id); break; } if (!hw_blks[i]) { - DPU_ERROR("Allocated resource %d unavailable to assign to crtc %d\n", - type, crtc_id); + DPU_ERROR("%s unavailable to assign to crtc %d\n", + dpu_hw_blk_type_name[type], crtc_id); break; } blks[num_blks++] = hw_blks[i]; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 8ff496082902..cd73468e369a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -80,7 +80,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, static const struct drm_connector_funcs dpu_wb_conn_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -131,12 +130,9 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, drm_connector_helper_add(&dpu_wb_conn->base.base, &dpu_wb_conn_helper_funcs); - /* DPU initializes the encoder and sets it up completely for writeback - * cases and hence should use the new API drm_writeback_connector_init_with_encoder - * to initialize the writeback connector - */ - rc = drm_writeback_connector_init_with_encoder(dev, &dpu_wb_conn->base, enc, - &dpu_wb_conn_funcs, format_list, num_formats); + rc = drmm_writeback_connector_init(dev, &dpu_wb_conn->base, + &dpu_wb_conn_funcs, enc, + format_list, num_formats); if (!rc) dpu_wb_conn->wb_enc = enc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 0952c7f18abd..809ca191e9de 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -391,11 +391,9 @@ static void read_mdp_hw_revision(struct mdp4_kms *mdp4_kms, static int mdp4_kms_init(struct drm_device *dev) { - struct platform_device *pdev = to_platform_device(dev->dev); struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(priv->kms)); struct msm_kms *kms = NULL; - struct msm_mmu *mmu; struct drm_gpuvm *vm; int ret; u32 major, minor; @@ -458,29 +456,14 @@ static int mdp4_kms_init(struct drm_device *dev) mdp4_disable(mdp4_kms); mdelay(16); - mmu = msm_iommu_new(&pdev->dev, 0); - if (IS_ERR(mmu)) { - ret = PTR_ERR(mmu); + vm = msm_kms_init_vm(mdp4_kms->dev, NULL); + if (IS_ERR(vm)) { + ret = PTR_ERR(vm); goto fail; - } else if (!mmu) { - DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " - "contig buffers for scanout\n"); - vm = NULL; - } else { - vm = msm_gem_vm_create(dev, mmu, "mdp4", - 0x1000, 0x100000000 - 0x1000, - true); - - if (IS_ERR(vm)) { - if (!IS_ERR(mmu)) - mmu->funcs->destroy(mmu); - ret = PTR_ERR(vm); - goto fail; - } - - kms->vm = vm; } + kms->vm = vm; + ret = modeset_init(mdp4_kms); if (ret) { DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret); @@ -529,7 +512,7 @@ static int mdp4_probe(struct platform_device *pdev) mdp4_kms = devm_kzalloc(dev, sizeof(*mdp4_kms), GFP_KERNEL); if (!mdp4_kms) - return dev_err_probe(dev, -ENOMEM, "failed to allocate kms\n"); + return -ENOMEM; mdp4_kms->mmio = msm_ioremap(pdev, NULL); if (IS_ERR(mdp4_kms->mmio)) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h index fb348583dc84..06458d4ee48c 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h @@ -202,6 +202,6 @@ static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev) } #endif -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev); +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev); #endif /* __MDP4_KMS_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 06a307c1272d..1051873057f6 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -375,7 +375,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev) drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs); - mdp4_lcdc_encoder->lcdc_clk = mpd4_get_lcdc_clock(dev); + mdp4_lcdc_encoder->lcdc_clk = mdp4_get_lcdc_clock(dev); if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n"); return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c index fa2c29470510..04c49bf3d854 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c @@ -54,7 +54,7 @@ static const struct pll_rate *find_rate(unsigned long rate) return &freqtbl[i-1]; } -static int mpd4_lvds_pll_enable(struct clk_hw *hw) +static int mdp4_lvds_pll_enable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -80,7 +80,7 @@ static int mpd4_lvds_pll_enable(struct clk_hw *hw) return 0; } -static void mpd4_lvds_pll_disable(struct clk_hw *hw) +static void mdp4_lvds_pll_disable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -91,21 +91,24 @@ static void mpd4_lvds_pll_disable(struct clk_hw *hw) mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x0); } -static unsigned long mpd4_lvds_pll_recalc_rate(struct clk_hw *hw, +static unsigned long mdp4_lvds_pll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); return lvds_pll->pixclk; } -static long mpd4_lvds_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int mdp4_lvds_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); - return pll_rate->rate; + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; + + return 0; } -static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, +static int mdp4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); @@ -114,26 +117,26 @@ static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, } -static const struct clk_ops mpd4_lvds_pll_ops = { - .enable = mpd4_lvds_pll_enable, - .disable = mpd4_lvds_pll_disable, - .recalc_rate = mpd4_lvds_pll_recalc_rate, - .round_rate = mpd4_lvds_pll_round_rate, - .set_rate = mpd4_lvds_pll_set_rate, +static const struct clk_ops mdp4_lvds_pll_ops = { + .enable = mdp4_lvds_pll_enable, + .disable = mdp4_lvds_pll_disable, + .recalc_rate = mdp4_lvds_pll_recalc_rate, + .determine_rate = mdp4_lvds_pll_determine_rate, + .set_rate = mdp4_lvds_pll_set_rate, }; -static const struct clk_parent_data mpd4_lvds_pll_parents[] = { +static const struct clk_parent_data mdp4_lvds_pll_parents[] = { { .fw_name = "pxo", .name = "pxo", }, }; static struct clk_init_data pll_init = { - .name = "mpd4_lvds_pll", - .ops = &mpd4_lvds_pll_ops, - .parent_data = mpd4_lvds_pll_parents, - .num_parents = ARRAY_SIZE(mpd4_lvds_pll_parents), + .name = "mdp4_lvds_pll", + .ops = &mdp4_lvds_pll_ops, + .parent_data = mdp4_lvds_pll_parents, + .num_parents = ARRAY_SIZE(mdp4_lvds_pll_parents), }; -static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) +static struct clk_hw *mdp4_lvds_pll_init(struct drm_device *dev) { struct mdp4_lvds_pll *lvds_pll; int ret; @@ -156,14 +159,14 @@ static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) return &lvds_pll->pll_hw; } -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev) +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev) { struct clk_hw *hw; struct clk *clk; /* TODO: do we need different pll in other cases? */ - hw = mpd4_lvds_pll_init(dev); + hw = mdp4_lvds_pll_init(dev); if (IS_ERR(hw)) { DRM_DEV_ERROR(dev->dev, "failed to register LVDS PLL\n"); return ERR_CAST(hw); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 5b6ca8dd929e..61edf6864092 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -534,7 +534,7 @@ static int mdp5_kms_init(struct drm_device *dev) } mdelay(16); - vm = msm_kms_init_vm(mdp5_kms->dev); + vm = msm_kms_init_vm(mdp5_kms->dev, pdev->dev.parent); if (IS_ERR(vm)) { ret = PTR_ERR(vm); goto fail; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 221f12db5f8b..4ea681130dba 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -5,6 +5,8 @@ #include <linux/clk-provider.h> #include <linux/platform_device.h> +#include <linux/pm_clock.h> +#include <linux/pm_runtime.h> #include <dt-bindings/phy/phy.h> #include "dsi_phy.h" @@ -511,30 +513,6 @@ int msm_dsi_cphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, return 0; } -static int dsi_phy_enable_resource(struct msm_dsi_phy *phy) -{ - struct device *dev = &phy->pdev->dev; - int ret; - - ret = pm_runtime_resume_and_get(dev); - if (ret) - return ret; - - ret = clk_prepare_enable(phy->ahb_clk); - if (ret) { - DRM_DEV_ERROR(dev, "%s: can't enable ahb clk, %d\n", __func__, ret); - pm_runtime_put_sync(dev); - } - - return ret; -} - -static void dsi_phy_disable_resource(struct msm_dsi_phy *phy) -{ - clk_disable_unprepare(phy->ahb_clk); - pm_runtime_put(&phy->pdev->dev); -} - static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", @@ -698,22 +676,20 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) if (ret) return ret; - phy->ahb_clk = msm_clk_get(pdev, "iface"); - if (IS_ERR(phy->ahb_clk)) - return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), - "Unable to get ahb clk\n"); + platform_set_drvdata(pdev, phy); - ret = devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(dev); if (ret) return ret; - /* PLL init will call into clk_register which requires - * register access, so we need to enable power and ahb clock. - */ - ret = dsi_phy_enable_resource(phy); + ret = devm_pm_clk_create(dev); if (ret) return ret; + ret = pm_clk_add(dev, "iface"); + if (ret < 0) + return dev_err_probe(dev, ret, "Unable to get iface clk\n"); + if (phy->cfg->ops.pll_init) { ret = phy->cfg->ops.pll_init(phy); if (ret) @@ -727,18 +703,19 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "Failed to register clk provider\n"); - dsi_phy_disable_resource(phy); - - platform_set_drvdata(pdev, phy); - return 0; } +static const struct dev_pm_ops dsi_phy_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + static struct platform_driver dsi_phy_platform_driver = { .probe = dsi_phy_driver_probe, .driver = { .name = "msm_dsi_phy", .of_match_table = dsi_phy_dt_match, + .pm = &dsi_phy_pm_ops, }, }; @@ -764,9 +741,9 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, dev = &phy->pdev->dev; - ret = dsi_phy_enable_resource(phy); + ret = pm_runtime_resume_and_get(dev); if (ret) { - DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", + DRM_DEV_ERROR(dev, "%s: resume failed, %d\n", __func__, ret); goto res_en_fail; } @@ -810,7 +787,7 @@ pll_restor_fail: phy_en_fail: regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); reg_en_fail: - dsi_phy_disable_resource(phy); + pm_runtime_put(dev); res_en_fail: return ret; } @@ -823,7 +800,7 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) phy->cfg->ops.disable(phy); regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); - dsi_phy_disable_resource(phy); + pm_runtime_put(&phy->pdev->dev); } void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c558f8df1684..e391505fdaf0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -104,12 +104,12 @@ struct msm_dsi_phy { phys_addr_t lane_size; int id; - struct clk *ahb_clk; struct regulator_bulk_data *supplies; struct msm_dsi_dphy_timing timing; const struct msm_dsi_phy_cfg *cfg; void *tuning_cfg; + void *pll_data; enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index af2e30f3f842..ec486ff02c9b 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -444,21 +444,19 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw, return (unsigned long)vco_rate; } -static long dsi_pll_10nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_10nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_10nm *pll_10nm = to_pll_10nm(hw); - if (rate < pll_10nm->phy->cfg->min_pll_rate) - return pll_10nm->phy->cfg->min_pll_rate; - else if (rate > pll_10nm->phy->cfg->max_pll_rate) - return pll_10nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_10nm->phy->cfg->min_pll_rate, pll_10nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_10nm_vco = { - .round_rate = dsi_pll_10nm_clk_round_rate, + .determine_rate = dsi_pll_10nm_clk_determine_rate, .set_rate = dsi_pll_10nm_vco_set_rate, .recalc_rate = dsi_pll_10nm_vco_recalc_rate, .prepare = dsi_pll_10nm_vco_prepare, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 3a1c8ece6657..fdefcbd9c284 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -578,21 +578,19 @@ static void dsi_pll_14nm_vco_unprepare(struct clk_hw *hw) pll_14nm->phy->pll_on = false; } -static long dsi_pll_14nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_14nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm *pll_14nm = to_pll_14nm(hw); - if (rate < pll_14nm->phy->cfg->min_pll_rate) - return pll_14nm->phy->cfg->min_pll_rate; - else if (rate > pll_14nm->phy->cfg->max_pll_rate) - return pll_14nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_14nm->phy->cfg->min_pll_rate, pll_14nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_14nm_vco = { - .round_rate = dsi_pll_14nm_clk_round_rate, + .determine_rate = dsi_pll_14nm_clk_determine_rate, .set_rate = dsi_pll_14nm_vco_set_rate, .recalc_rate = dsi_pll_14nm_vco_recalc_rate, .prepare = dsi_pll_14nm_vco_prepare, @@ -622,18 +620,20 @@ static unsigned long dsi_pll_14nm_postdiv_recalc_rate(struct clk_hw *hw, postdiv->flags, width); } -static long dsi_pll_14nm_postdiv_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *prate) +static int dsi_pll_14nm_postdiv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw); struct dsi_pll_14nm *pll_14nm = postdiv->pll; - DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, rate); + DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, req->rate); - return divider_round_rate(hw, rate, prate, NULL, - postdiv->width, - postdiv->flags); + req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, + NULL, + postdiv->width, + postdiv->flags); + + return 0; } static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -680,7 +680,7 @@ static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops clk_ops_dsi_pll_14nm_postdiv = { .recalc_rate = dsi_pll_14nm_postdiv_recalc_rate, - .round_rate = dsi_pll_14nm_postdiv_round_rate, + .determine_rate = dsi_pll_14nm_postdiv_determine_rate, .set_rate = dsi_pll_14nm_postdiv_set_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index 90348a2af3e9..d00e415b9a99 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -533,21 +533,20 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, + pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_hpm, @@ -556,7 +555,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_lp, @@ -565,7 +564,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_8226 = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_8226, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index f3643320ff2f..8dcce9581dc3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -231,21 +231,19 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare, @@ -296,18 +294,20 @@ static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate) return 8; } -static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int clk_bytediv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { unsigned long best_parent; unsigned int factor; - factor = get_vco_mul_factor(rate); + factor = get_vco_mul_factor(req->rate); + + best_parent = req->rate * factor; + req->best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); - best_parent = rate * factor; - *prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); + req->rate = req->best_parent_rate / factor; - return *prate / factor; + return 0; } static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -328,7 +328,7 @@ static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, /* Our special byte clock divider ops */ static const struct clk_ops clk_bytediv_ops = { - .round_rate = clk_bytediv_round_rate, + .determine_rate = clk_bytediv_determine_rate, .set_rate = clk_bytediv_set_rate, .recalc_rate = clk_bytediv_recalc_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8c98f91a5930..32f06edd21a9 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -90,6 +90,13 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ spinlock_t pclk_mux_lock; + /* + * protects REG_DSI_7nm_PHY_CMN_CTRL_0 register and pll_enable_cnt + * member + */ + spinlock_t pll_enable_lock; + int pll_enable_cnt; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -103,6 +110,9 @@ struct dsi_pll_7nm { */ static struct dsi_pll_7nm *pll_7nm_list[DSI_MAX]; +static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll); +static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll); + static void dsi_pll_setup_config(struct dsi_pll_config *config) { config->ssc_freq = 31500; @@ -340,6 +350,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); struct dsi_pll_config config; + dsi_pll_enable_pll_bias(pll_7nm); DBG("DSI PLL%d rate=%lu, parent's=%lu", pll_7nm->phy->id, rate, parent_rate); @@ -357,6 +368,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, dsi_pll_ssc_commit(pll_7nm, &config); + dsi_pll_disable_pll_bias(pll_7nm); /* flush, ensure all register writes are done*/ wmb(); @@ -385,19 +397,47 @@ static int dsi_pll_7nm_lock_status(struct dsi_pll_7nm *pll) static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + spin_lock_irqsave(&pll->pll_enable_lock, flags); + --pll->pll_enable_cnt; + if (pll->pll_enable_cnt < 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + DRM_DEV_ERROR_RATELIMITED(&pll->phy->pdev->dev, + "bug: imbalance in disabling PLL bias\n"); + return; + } else if (pll->pll_enable_cnt > 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + return; + } /* else: == 0 */ + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data &= ~DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + + spin_lock_irqsave(&pll->pll_enable_lock, flags); + if (pll->pll_enable_cnt++) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + WARN_ON(pll->pll_enable_cnt == INT_MAX); + return; + } + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); - writel(data | BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0xc0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } @@ -491,6 +531,10 @@ static int dsi_pll_7nm_vco_prepare(struct clk_hw *hw) if (pll_7nm->slave) dsi_pll_enable_global_clk(pll_7nm->slave); + writel(0x1, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + if (pll_7nm->slave) + writel(0x1, pll_7nm->slave->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + error: return rc; } @@ -534,6 +578,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, u32 dec; u64 pll_freq, tmp64; + dsi_pll_enable_pll_bias(pll_7nm); dec = readl(base + REG_DSI_7nm_PHY_PLL_DECIMAL_DIV_START_1); dec &= 0xff; @@ -558,24 +603,24 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x", pll_7nm->phy->id, (unsigned long)vco_rate, dec, frac); + dsi_pll_disable_pll_bias(pll_7nm); + return (unsigned long)vco_rate; } -static long dsi_pll_7nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_7nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); - if (rate < pll_7nm->phy->cfg->min_pll_rate) - return pll_7nm->phy->cfg->min_pll_rate; - else if (rate > pll_7nm->phy->cfg->max_pll_rate) - return pll_7nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_7nm->phy->cfg->min_pll_rate, pll_7nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_7nm_vco = { - .round_rate = dsi_pll_7nm_clk_round_rate, + .determine_rate = dsi_pll_7nm_clk_determine_rate, .set_rate = dsi_pll_7nm_vco_set_rate, .recalc_rate = dsi_pll_7nm_vco_recalc_rate, .prepare = dsi_pll_7nm_vco_prepare, @@ -593,6 +638,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) void __iomem *phy_base = pll_7nm->phy->base; u32 cmn_clk_cfg0, cmn_clk_cfg1; + dsi_pll_enable_pll_bias(pll_7nm); cached->pll_out_div = readl(pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); cached->pll_out_div &= 0x3; @@ -604,6 +650,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) cmn_clk_cfg1 = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); cached->pll_mux = FIELD_GET(DSI_7nm_PHY_CMN_CLK_CFG1_DSICLK_SEL__MASK, cmn_clk_cfg1); + dsi_pll_disable_pll_bias(pll_7nm); DBG("DSI PLL%d outdiv %x bit_clk_div %x pix_clk_div %x pll_mux %x", pll_7nm->phy->id, cached->pll_out_div, cached->bit_clk_div, cached->pix_clk_div, cached->pll_mux); @@ -826,8 +873,10 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) spin_lock_init(&pll_7nm->postdiv_lock); spin_lock_init(&pll_7nm->pclk_mux_lock); + spin_lock_init(&pll_7nm->pll_enable_lock); pll_7nm->phy = phy; + phy->pll_data = pll_7nm; ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws); if (ret) { @@ -839,6 +888,12 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_7nm_pll_restore_state(). + */ + if (!dsi_pll_7nm_vco_recalc_rate(&pll_7nm->clk_hw, VCO_REF_CLK_RATE)) + pll_7nm->vco_current_rate = pll_7nm->phy->cfg->min_pll_rate; return 0; } @@ -910,8 +965,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, u32 const delay_us = 5; u32 const timeout_us = 1000; struct msm_dsi_dphy_timing *timing = &phy->timing; + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; bool less_than_1500_mhz; + unsigned long flags; u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; u32 glbl_pemph_ctrl_0; u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; @@ -1033,9 +1090,13 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 1; /* de-assert digital and pll power down */ - data = BIT(6) | BIT(5); + data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B | + DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* Assert PLL core reset */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL); @@ -1148,7 +1209,9 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; + unsigned long flags; u32 data; DBG(""); @@ -1175,8 +1238,12 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0); + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 0; /* Turn off all PHY blocks */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + /* make sure phy is turned off */ wmb(); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 8c8d80b59573..36e928b0fd5a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -629,16 +629,12 @@ static int hdmi_8996_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8996_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8996_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw, @@ -684,7 +680,7 @@ static int hdmi_8996_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8996_pll_ops = { .set_rate = hdmi_8996_pll_set_clk_rate, - .round_rate = hdmi_8996_pll_round_rate, + .determine_rate = hdmi_8996_pll_determine_rate, .recalc_rate = hdmi_8996_pll_recalc_rate, .prepare = hdmi_8996_pll_prepare, .unprepare = hdmi_8996_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index 33bb48ae58a2..a86ff3706369 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -646,16 +646,12 @@ static int hdmi_8998_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8998_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8998_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8998_pll_recalc_rate(struct clk_hw *hw, @@ -688,7 +684,7 @@ static int hdmi_8998_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8998_pll_ops = { .set_rate = hdmi_8998_pll_set_clk_rate, - .round_rate = hdmi_8998_pll_round_rate, + .determine_rate = hdmi_8998_pll_determine_rate, .recalc_rate = hdmi_8998_pll_recalc_rate, .prepare = hdmi_8998_pll_prepare, .unprepare = hdmi_8998_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c index 83c8781fcc3f..6ba6bbdb7e05 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c @@ -373,12 +373,14 @@ static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw, return pll->pixclk; } -static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int hdmi_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; - return pll_rate->rate; + return 0; } static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, @@ -402,7 +404,7 @@ static const struct clk_ops hdmi_pll_ops = { .enable = hdmi_pll_enable, .disable = hdmi_pll_disable, .recalc_rate = hdmi_pll_recalc_rate, - .round_rate = hdmi_pll_round_rate, + .determine_rate = hdmi_pll_determine_rate, .set_rate = hdmi_pll_set_rate, }; diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index bbda865addae..97dc70876442 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -325,25 +325,28 @@ static struct drm_info_list msm_debugfs_list[] = { static int late_init_minor(struct drm_minor *minor) { - struct drm_device *dev = minor->dev; - struct msm_drm_private *priv = dev->dev_private; + struct drm_device *dev; + struct msm_drm_private *priv; int ret; if (!minor) return 0; + dev = minor->dev; + priv = dev->dev_private; + if (!priv->gpu_pdev) return 0; ret = msm_rd_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install rd debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install rd debugfs\n"); return ret; } ret = msm_perf_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install perf debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install perf debugfs\n"); return ret; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9dcc7a596a11..7e977fec4100 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -826,6 +826,7 @@ static const struct file_operations fops = { #define DRIVER_FEATURES_KMS ( \ DRIVER_GEM | \ + DRIVER_GEM_GPUVA | \ DRIVER_ATOMIC | \ DRIVER_MODESET | \ 0 ) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 985db9febd98..6d847d593f1a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -229,7 +229,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc); int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu); -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev); +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev); bool msm_use_mmu(struct drm_device *dev); int msm_ioctl_gem_submit(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 7ff994d4f91a..07d8cdd6bb2e 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -95,7 +95,6 @@ void msm_gem_vma_get(struct drm_gem_object *obj) void msm_gem_vma_put(struct drm_gem_object *obj) { struct msm_drm_private *priv = obj->dev->dev_private; - struct drm_exec exec; if (atomic_dec_return(&to_msm_bo(obj)->vma_ref)) return; @@ -103,9 +102,13 @@ void msm_gem_vma_put(struct drm_gem_object *obj) if (!priv->kms) return; +#ifdef CONFIG_DRM_MSM_KMS + struct drm_exec exec; + msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm); put_iova_spaces(obj, priv->kms->vm, true, "vma_put"); drm_exec_fini(&exec); /* drop locks */ +#endif } /* @@ -188,7 +191,7 @@ static struct page **get_pages(struct drm_gem_object *obj) if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; p = drm_gem_get_pages(obj); @@ -663,9 +666,13 @@ int msm_gem_set_iova(struct drm_gem_object *obj, static bool is_kms_vm(struct drm_gpuvm *vm) { +#ifdef CONFIG_DRM_MSM_KMS struct msm_drm_private *priv = vm->drm->dev_private; return priv->kms && (priv->kms->vm == vm); +#else + return false; +#endif } /* @@ -1113,10 +1120,12 @@ static void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); } - if (msm_obj->flags & MSM_BO_NO_SHARE) { + if (obj->resv != &obj->_resv) { struct drm_gem_object *r_obj = container_of(obj->resv, struct drm_gem_object, _resv); + WARN_ON(!(msm_obj->flags & MSM_BO_NO_SHARE)); + /* Drop reference we hold to shared resv obj: */ drm_gem_object_put(r_obj); } @@ -1139,7 +1148,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct /* convenience method to construct a GEM buffer object, and userspace handle */ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, + size_t size, uint32_t flags, uint32_t *handle, char *name) { struct drm_gem_object *obj; @@ -1205,9 +1214,8 @@ static const struct drm_gem_object_funcs msm_gem_object_funcs = { .vm_ops = &vm_ops, }; -static int msm_gem_new_impl(struct drm_device *dev, - uint32_t size, uint32_t flags, - struct drm_gem_object **obj) +static int msm_gem_new_impl(struct drm_device *dev, uint32_t flags, + struct drm_gem_object **obj) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1241,7 +1249,7 @@ static int msm_gem_new_impl(struct drm_device *dev, return 0; } -struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) +struct drm_gem_object *msm_gem_new(struct drm_device *dev, size_t size, uint32_t flags) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1256,7 +1264,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32 if (size == 0) return ERR_PTR(-EINVAL); - ret = msm_gem_new_impl(dev, size, flags, &obj); + ret = msm_gem_new_impl(dev, flags, &obj); if (ret) return ERR_PTR(ret); @@ -1296,12 +1304,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; struct drm_gem_object *obj; - uint32_t size; - int ret, npages; + size_t size, npages; + int ret; size = PAGE_ALIGN(dmabuf->size); - ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); + ret = msm_gem_new_impl(dev, MSM_BO_WC, &obj); if (ret) return ERR_PTR(ret); @@ -1344,7 +1352,7 @@ fail: return ERR_PTR(ret); } -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova) { diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 88239da1cd72..a4cf31853c50 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -100,7 +100,7 @@ struct msm_gem_vm { * * Only used for kernel managed VMs, unused for user managed VMs. * - * Protected by @mm_lock. + * Protected by vm lock. See msm_gem_lock_vm_and_obj(), for ex. */ struct drm_mm mm; @@ -297,10 +297,10 @@ bool msm_gem_active(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_fini(struct drm_gem_object *obj); int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, char *name); + size_t size, uint32_t flags, uint32_t *handle, char *name); struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags); -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, + size_t size, uint32_t flags); +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova); void msm_gem_kernel_put(struct drm_gem_object *bo, struct drm_gpuvm *vm); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index c0a33ac839cb..036d34c674d9 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -15,7 +15,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; if (msm_obj->flags & MSM_BO_NO_SHARE) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5f8e939a5906..3ab3b27134f9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -271,32 +271,37 @@ out: return ret; } -/* This is where we make sure all the bo's are reserved and pin'd: */ -static int submit_lock_objects(struct msm_gem_submit *submit) +static int submit_lock_objects_vmbind(struct msm_gem_submit *submit) { - unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES; struct drm_exec *exec = &submit->exec; - int ret; + int ret = 0; - if (msm_context_is_vmbind(submit->queue->ctx)) { - flags |= DRM_EXEC_IGNORE_DUPLICATES; + drm_exec_init(&submit->exec, flags, submit->nr_bos); - drm_exec_init(&submit->exec, flags, submit->nr_bos); + drm_exec_until_all_locked (&submit->exec) { + ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; - drm_exec_until_all_locked (&submit->exec) { - ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; + ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; + } - ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; - } + return ret; +} - return 0; - } +/* This is where we make sure all the bo's are reserved and pin'd: */ +static int submit_lock_objects(struct msm_gem_submit *submit) +{ + unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + int ret = 0; + + if (msm_context_is_vmbind(submit->queue->ctx)) + return submit_lock_objects_vmbind(submit); drm_exec_init(&submit->exec, flags, submit->nr_bos); @@ -305,17 +310,17 @@ static int submit_lock_objects(struct msm_gem_submit *submit) drm_gpuvm_resv_obj(submit->vm)); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; for (unsigned i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; ret = drm_exec_prepare_obj(&submit->exec, obj, 1); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; } } - return 0; + return ret; } static int submit_fence_sync(struct msm_gem_submit *submit) @@ -514,14 +519,15 @@ out: */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { + if (error) + submit_unpin_objects(submit); + if (submit->exec.objects) drm_exec_fini(&submit->exec); - if (error) { - submit_unpin_objects(submit); - /* job wasn't enqueued to scheduler, so early retirement: */ + /* if job wasn't enqueued to scheduler, early retirement: */ + if (error) msm_submit_retire(submit); - } } void msm_submit_retire(struct msm_gem_submit *submit) @@ -769,12 +775,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { sync_file = sync_file_create(submit->user_fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -812,10 +814,14 @@ out: out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(submit)) { diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 210604181c05..8316af1723c2 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -319,13 +319,10 @@ msm_gem_vma_map(struct drm_gpuva *vma, int prot, struct sg_table *sgt) mutex_lock(&vm->mmu_lock); /* - * NOTE: iommu/io-pgtable can allocate pages, so we cannot hold + * NOTE: if not using pgtable preallocation, we cannot hold * a lock across map/unmap which is also used in the job_run() * path, as this can cause deadlock in job_run() vs shrinker/ * reclaim. - * - * Revisit this if we can come up with a scheme to pre-alloc pages - * for the pgtable in map/unmap ops. */ ret = vm_map_op(vm, &(struct msm_vm_map_op){ .iova = vma->va.addr, @@ -371,12 +368,6 @@ struct drm_gpuva * msm_gem_vma_new(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj, u64 offset, u64 range_start, u64 range_end) { - struct drm_gpuva_op_map op_map = { - .va.addr = range_start, - .va.range = range_end - range_start, - .gem.obj = obj, - .gem.offset = offset, - }; struct msm_gem_vm *vm = to_msm_vm(gpuvm); struct drm_gpuvm_bo *vm_bo; struct msm_gem_vma *vma; @@ -405,6 +396,13 @@ msm_gem_vma_new(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj, if (obj) GEM_WARN_ON((range_end - range_start) > obj->size); + struct drm_gpuva_op_map op_map = { + .va.addr = range_start, + .va.range = range_end - range_start, + .gem.obj = obj, + .gem.offset = offset, + }; + drm_gpuva_init_from_op(&vma->base, &op_map); vma->mapped = false; @@ -460,6 +458,8 @@ msm_gem_vm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) struct op_arg { unsigned flags; struct msm_vm_bind_job *job; + const struct msm_vm_bind_op *op; + bool kept; }; static void @@ -481,14 +481,18 @@ vma_from_op(struct op_arg *arg, struct drm_gpuva_op_map *op) } static int -msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gem_object *obj = op->map.gem.obj; struct drm_gpuva *vma; struct sg_table *sgt; unsigned prot; + if (arg->kept) + return 0; + vma = vma_from_op(arg, &op->map); if (WARN_ON(IS_ERR(vma))) return PTR_ERR(vma); @@ -608,15 +612,41 @@ msm_gem_vm_sm_step_remap(struct drm_gpuva_op *op, void *arg) } static int -msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gpuva *vma = op->unmap.va; struct msm_gem_vma *msm_vma = to_msm_vma(vma); vm_dbg("%p:%p:%p: %016llx %016llx", vma->vm, vma, vma->gem.obj, vma->va.addr, vma->va.range); + /* + * Detect in-place remap. Turnip does this to change the vma flags, + * in particular MSM_VMA_DUMP. In this case we want to avoid actually + * touching the page tables, as that would require synchronization + * against SUBMIT jobs running on the GPU. + */ + if (op->unmap.keep && + (arg->op->op == MSM_VM_BIND_OP_MAP) && + (vma->gem.obj == arg->op->obj) && + (vma->gem.offset == arg->op->obj_offset) && + (vma->va.addr == arg->op->iova) && + (vma->va.range == arg->op->range)) { + /* We are only expecting a single in-place unmap+map cb pair: */ + WARN_ON(arg->kept); + + /* Leave the existing VMA in place, but signal that to the map cb: */ + arg->kept = true; + + /* Only flags are changing, so update that in-place: */ + unsigned orig_flags = vma->flags & (DRM_GPUVA_USERBITS - 1); + vma->flags = orig_flags | arg->flags; + + return 0; + } + if (!msm_vma->mapped) goto out_close; @@ -1000,6 +1030,7 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args struct drm_device *dev = job->vm->drm; int ret = 0; int cnt = 0; + int i = -1; if (args->nr_ops == 1) { /* Single op case, the op is inlined: */ @@ -1033,11 +1064,12 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args spin_lock(&file->table_lock); - for (unsigned i = 0; i < args->nr_ops; i++) { + for (i = 0; i < args->nr_ops; i++) { + struct msm_vm_bind_op *op = &job->ops[i]; struct drm_gem_object *obj; - if (!job->ops[i].handle) { - job->ops[i].obj = NULL; + if (!op->handle) { + op->obj = NULL; continue; } @@ -1045,16 +1077,22 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args * normally use drm_gem_object_lookup(), but for bulk lookup * all under single table_lock just hit object_idr directly: */ - obj = idr_find(&file->object_idr, job->ops[i].handle); + obj = idr_find(&file->object_idr, op->handle); if (!obj) { - ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", job->ops[i].handle, i); + ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", op->handle, i); goto out_unlock; } drm_gem_object_get(obj); - job->ops[i].obj = obj; + op->obj = obj; cnt++; + + if ((op->range + op->obj_offset) > obj->size) { + ret = UERR(EINVAL, dev, "invalid range: %016llx + %016llx > %016zx\n", + op->range, op->obj_offset, obj->size); + goto out_unlock; + } } *nr_bos = cnt; @@ -1062,6 +1100,17 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args out_unlock: spin_unlock(&file->table_lock); + if (ret) { + for (; i >= 0; i--) { + struct msm_vm_bind_op *op = &job->ops[i]; + + if (!op->obj) + continue; + + drm_gem_object_put(op->obj); + op->obj = NULL; + } + } out: return ret; } @@ -1283,6 +1332,7 @@ vm_bind_job_prepare(struct msm_vm_bind_job *job) const struct msm_vm_bind_op *op = &job->ops[i]; struct op_arg arg = { .job = job, + .op = op, }; switch (op->op) { @@ -1479,12 +1529,8 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) if (args->flags & MSM_VM_BIND_FENCE_FD_OUT) { sync_file = sync_file_create(job->fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -1513,10 +1559,14 @@ out: out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(job)) { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index c317b25a8162..17759abc46d7 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -304,7 +304,7 @@ static void crashstate_get_bos(struct msm_gpu_state *state, struct msm_gem_submi sizeof(struct msm_gpu_state_bo), GFP_KERNEL); for (int i = 0; state->bos && i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj;; + struct drm_gem_object *obj = submit->bos[i].obj; bool dump = rd_full || (submit->bos[i].flags & MSM_SUBMIT_BO_DUMP); msm_gem_lock(obj); @@ -465,6 +465,7 @@ static void recover_worker(struct kthread_work *work) struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; + struct task_struct *task; int i; mutex_lock(&gpu->lock); @@ -482,16 +483,20 @@ static void recover_worker(struct kthread_work *work) /* Increment the fault counts */ submit->queue->faults++; - if (submit->vm) { + + task = get_pid_task(submit->pid, PIDTYPE_PID); + if (!task) + gpu->global_faults++; + else { struct msm_gem_vm *vm = to_msm_vm(submit->vm); vm->faults++; /* * If userspace has opted-in to VM_BIND (and therefore userspace - * management of the VM), faults mark the VM as unusuable. This + * management of the VM), faults mark the VM as unusable. This * matches vulkan expectations (vulkan is the main target for - * VM_BIND) + * VM_BIND). */ if (!vm->managed) msm_gem_vm_unusable(submit->vm); @@ -553,8 +558,15 @@ static void recover_worker(struct kthread_work *work) unsigned long flags; spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) + list_for_each_entry(submit, &ring->submits, node) { + /* + * If the submit uses an unusable vm make sure + * we don't actually run it + */ + if (to_msm_vm(submit->vm)->unusable) + submit->nr_cmds = 0; gpu->funcs->submit(gpu, submit); + } spin_unlock_irqrestore(&ring->submit_lock, flags); } } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index b2a96544f92a..a597f2bee30b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -16,6 +16,7 @@ #include "msm_drv.h" #include "msm_fence.h" +#include "msm_gpu_trace.h" #include "msm_ringbuffer.h" #include "msm_gem.h" @@ -91,6 +92,7 @@ struct msm_gpu_funcs { * for cmdstream that is buffered in this FIFO upstream of the CP fw. */ bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); + void (*sysprof_setup)(struct msm_gpu *gpu); }; /* Additional state for iommu faults: */ @@ -613,16 +615,19 @@ struct msm_gpu_state { static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { + trace_msm_gpu_regaccess(reg); writel(data, gpu->mmio + (reg << 2)); } static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) { + trace_msm_gpu_regaccess(reg); return readl(gpu->mmio + (reg << 2)); } static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) { + trace_msm_gpu_regaccess(reg); msm_rmw(gpu->mmio + (reg << 2), mask, or); } @@ -644,7 +649,9 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) * when the lo is read, so make sure to read the lo first to trigger * that */ + trace_msm_gpu_regaccess(reg); val = (u64) readl(gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); val |= ((u64) readl(gpu->mmio + ((reg + 1) << 2)) << 32); return val; @@ -652,8 +659,10 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) { + trace_msm_gpu_regaccess(reg); /* Why not a writeq here? Read the screed above */ writel(lower_32_bits(val), gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); } diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h index 781bbe5540bd..5417f8d389a3 100644 --- a/drivers/gpu/drm/msm/msm_gpu_trace.h +++ b/drivers/gpu/drm/msm/msm_gpu_trace.h @@ -219,6 +219,18 @@ TRACE_EVENT(msm_mmu_prealloc_cleanup, TP_printk("count=%u, remaining=%u", __entry->count, __entry->remaining) ); +TRACE_EVENT(msm_gpu_regaccess, + TP_PROTO(u32 offset), + TP_ARGS(offset), + TP_STRUCT__entry( + __field(u32, offset) + ), + TP_fast_assign( + __entry->offset = offset; + ), + TP_printk("offset=0x%x", __entry->offset) +); + #endif #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 55c29f49b788..0e18619f96cb 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -14,7 +14,9 @@ struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; - atomic_t pagetables; + + struct mutex init_lock; /* protects pagetables counter and prr_page */ + int pagetables; struct page *prr_page; struct kmem_cache *pt_cache; @@ -227,7 +229,8 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) * If this is the last attached pagetable for the parent, * disable TTBR0 in the arm-smmu driver */ - if (atomic_dec_return(&iommu->pagetables) == 0) { + mutex_lock(&iommu->init_lock); + if (--iommu->pagetables == 0) { adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); if (adreno_smmu->set_prr_bit) { @@ -236,6 +239,7 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) iommu->prr_page = NULL; } } + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); @@ -568,9 +572,12 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m * If this is the first pagetable that we've allocated, send it back to * the arm-smmu driver as a trigger to set up TTBR0 */ - if (atomic_inc_return(&iommu->pagetables) == 1) { + mutex_lock(&iommu->init_lock); + if (iommu->pagetables++ == 0) { ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg); if (ret) { + iommu->pagetables--; + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); return ERR_PTR(ret); @@ -595,6 +602,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m adreno_smmu->set_prr_bit(adreno_smmu->cookie, true); } } + mutex_unlock(&iommu->init_lock); /* Needed later for TLB flush */ pagetable->parent = parent; @@ -713,7 +721,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) int ret; if (!device_iommu_mapped(dev)) - return NULL; + return ERR_PTR(-ENODEV); domain = iommu_paging_domain_alloc(dev); if (IS_ERR(domain)) @@ -730,7 +738,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - atomic_set(&iommu->pagetables, 0); + mutex_init(&iommu->init_lock); ret = iommu_attach_device(iommu->domain, dev); if (ret) { @@ -748,7 +756,7 @@ struct msm_mmu *msm_iommu_disp_new(struct device *dev, unsigned long quirks) struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); @@ -764,11 +772,11 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); - if (adreno_smmu && adreno_smmu->cookie) { + if (adreno_smmu->cookie) { const struct io_pgtable_cfg *cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); size_t tblsz = get_tblsz(cfg); diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 6889f1c1e721..6e5e94f5c9a7 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -177,12 +177,11 @@ static int msm_kms_fault_handler(void *arg, unsigned long iova, int flags, void return -ENOSYS; } -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev) { struct drm_gpuvm *vm; struct msm_mmu *mmu; struct device *mdp_dev = dev->dev; - struct device *mdss_dev = mdp_dev->parent; struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct device *iommu_dev; @@ -193,18 +192,17 @@ struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) */ if (device_iommu_mapped(mdp_dev)) iommu_dev = mdp_dev; - else + else if (mdss_dev && device_iommu_mapped(mdss_dev)) iommu_dev = mdss_dev; + else { + drm_info(dev, "no IOMMU, bailing out\n"); + return ERR_PTR(-ENODEV); + } mmu = msm_iommu_disp_new(iommu_dev, 0); if (IS_ERR(mmu)) return ERR_CAST(mmu); - if (!mmu) { - drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n"); - return NULL; - } - vm = msm_gem_vm_create(dev, mmu, "mdp_kms", 0x1000, 0x100000000 - 0x1000, true); if (IS_ERR(vm)) { @@ -275,6 +273,12 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) if (ret) return ret; + ret = msm_disp_snapshot_init(ddev); + if (ret) { + DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); + return ret; + } + ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); @@ -327,10 +331,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) goto err_msm_uninit; } - ret = msm_disp_snapshot_init(ddev); - if (ret) - DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); - drm_mode_config_reset(ddev); return 0; diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 1f5fe7811e01..2d0e3e784c04 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -154,8 +154,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) dev = msm_mdss->dev; - domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), 32, - &msm_mdss_irqdomain_ops, msm_mdss); + domain = irq_domain_create_linear(dev_fwnode(dev), 32, &msm_mdss_irqdomain_ops, msm_mdss); if (!domain) { dev_err(dev, "failed to add irq_domain\n"); return -EINVAL; @@ -423,7 +422,7 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 if (IS_ERR(msm_mdss->mmio)) return ERR_CAST(msm_mdss->mmio); - dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio); + dev_dbg(&pdev->dev, "mapped mdss address space @%p\n", msm_mdss->mmio); ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); if (ret) diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 8617a82cd6b3..d53dfad16bde 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -40,6 +40,10 @@ int msm_context_set_sysprof(struct msm_context *ctx, struct msm_gpu *gpu, int sy break; } + /* Some gpu families require additional setup for sysprof */ + if (gpu->funcs->sysprof_setup) + gpu->funcs->sysprof_setup(gpu); + ctx->sysprof = sysprof; return 0; diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index d860fd94feae..9459b6038217 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -594,10 +594,14 @@ by a particular renderpass/blit. <reg32 offset="0x0600" name="DBGC_CFG_DBGBUS_SEL_A"/> <reg32 offset="0x0601" name="DBGC_CFG_DBGBUS_SEL_B"/> <reg32 offset="0x0602" name="DBGC_CFG_DBGBUS_SEL_C"/> - <reg32 offset="0x0603" name="DBGC_CFG_DBGBUS_SEL_D"> + <reg32 offset="0x0603" name="DBGC_CFG_DBGBUS_SEL_D" variants="A6XX"> <bitfield high="7" low="0" name="PING_INDEX"/> <bitfield high="15" low="8" name="PING_BLK_SEL"/> </reg32> + <reg32 offset="0x0603" name="DBGC_CFG_DBGBUS_SEL_D" variants="A7XX-"> + <bitfield high="7" low="0" name="PING_INDEX"/> + <bitfield high="24" low="16" name="PING_BLK_SEL"/> + </reg32> <reg32 offset="0x0604" name="DBGC_CFG_DBGBUS_CNTLT"> <bitfield high="5" low="0" name="TRACEEN"/> <bitfield high="14" low="12" name="GRANU"/> @@ -810,7 +814,7 @@ by a particular renderpass/blit. <bitfield name="Y" low="16" high="29" type="uint"/> </bitset> - <reg32 offset="0x8000" name="GRAS_CL_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_cntl" inline="yes"> <bitfield name="CLIP_DISABLE" pos="0" type="boolean"/> <bitfield name="ZNEAR_CLIP_DISABLE" pos="1" type="boolean"/> <bitfield name="ZFAR_CLIP_DISABLE" pos="2" type="boolean"/> @@ -822,18 +826,20 @@ by a particular renderpass/blit. <bitfield name="VP_CLIP_CODE_IGNORE" pos="7" type="boolean"/> <bitfield name="VP_XFORM_DISABLE" pos="8" type="boolean"/> <bitfield name="PERSP_DIVISION_DISABLE" pos="9" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_gras_xs_clip_cull_distance" inline="yes"> <bitfield name="CLIP_MASK" low="0" high="7"/> <bitfield name="CULL_MASK" low="8" high="15"/> </bitset> - <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit"/> + <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_interp_cntl" inline="yes"> <!-- see also RB_INTERP_CNTL --> <bitfield name="IJ_PERSP_PIXEL" pos="0" type="boolean"/> <bitfield name="IJ_PERSP_CENTROID" pos="1" type="boolean"/> @@ -844,26 +850,69 @@ by a particular renderpass/blit. <bitfield name="COORD_MASK" low="6" high="9" type="hex"/> <bitfield name="UNK10" pos="10" type="boolean" variants="A7XX-"/> <bitfield name="UNK11" pos="11" type="boolean" variants="A7XX-"/> - </reg32> - <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_cl_guardband_clip_adj" inline="true"> <bitfield name="HORZ" low="0" high="8" type="uint"/> <bitfield name="VERT" low="10" high="18" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="rp_blit"/> <!-- Something connected to depth-stencil attachment size --> <reg32 offset="0x8007" name="GRAS_UNKNOWN_8007" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x8008" name="GRAS_UNKNOWN_8008" variants="A7XX-" usage="cmd"/> + <!-- the scale/offset is per view, with up to 6 views --> + <bitset name="a6xx_gras_bin_foveat" inline="yes"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + <enum name="a7xx_bin_scale"> + <value value="0" name="NOSCALE"/> + <value value="1" name="SCALE2X"/> + <value value="2" name="SCALE4X"/> + </enum> + <bitfield name="XSCALE_0" low="8" high="9" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_0" low="10" high="11" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_1" low="12" high="13" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_1" low="14" high="15" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_2" low="16" high="17" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_2" low="18" high="19" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_3" low="20" high="21" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_3" low="22" high="23" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_4" low="24" high="25" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_4" low="26" high="27" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_5" low="28" high="29" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_5" low="30" high="31" type="a7xx_bin_scale"/> + </bitset> - <reg32 offset="0x8009" name="GRAS_UNKNOWN_8009" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800a" name="GRAS_UNKNOWN_800A" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800b" name="GRAS_UNKNOWN_800B" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800c" name="GRAS_UNKNOWN_800C" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x8008" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A7XX" usage="cmd"/> + + <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> <!-- <reg32 offset="0x80f0" name="GRAS_UNKNOWN_80F0" type="a6xx_reg_xy"/> --> <!-- 0x8006-0x800f invalid --> - <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" usage="rp_blit"> + <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="XOFFSET" type="float"/> <reg32 offset="1" name="XSCALE" type="float"/> <reg32 offset="2" name="YOFFSET" type="float"/> @@ -871,12 +920,13 @@ by a particular renderpass/blit. <reg32 offset="4" name="ZOFFSET" type="float"/> <reg32 offset="5" name="ZSCALE" type="float"/> </array> - <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" usage="rp_blit"> + + <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="MIN" type="float"/> <reg32 offset="1" name="MAX" type="float"/> </array> - <reg32 offset="0x8090" name="GRAS_SU_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_su_cntl" varset="chip"> <bitfield name="CULL_FRONT" pos="0" type="boolean"/> <bitfield name="CULL_BACK" pos="1" type="boolean"/> <bitfield name="FRONT_CW" pos="2" type="boolean"/> @@ -886,39 +936,66 @@ by a particular renderpass/blit. <bitfield name="LINE_MODE" pos="13" type="a5xx_line_mode"/> <bitfield name="UNK15" low="15" high="16"/> <!-- - On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have - the ability to add the view index to either the RT array - index or the viewport index, and it seems that - MULTIVIEW_ENABLE doesn't do anything, instead we need to - set at least one of RENDERTARGETINDEXINCR or - VIEWPORTINDEXINCR to enable multiview. The blob still - sets MULTIVIEW_ENABLE regardless. - TODO: what about gen2 (a640)? + On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have + the ability to add the view index to either the RT array + index or the viewport index, and it seems that + MULTIVIEW_ENABLE doesn't do anything, instead we need to + set at least one of RENDERTARGETINDEXINCR or + VIEWPORTINDEXINCR to enable multiview. The blob still + sets MULTIVIEW_ENABLE regardless. + TODO: what about gen2 (a640)? --> <bitfield name="MULTIVIEW_ENABLE" pos="17" type="boolean"/> - <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean"/> - <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean"/> - <bitfield name="UNK20" low="20" high="22"/> - </reg32> - <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" usage="rp_blit"> + <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="UNK20" low="20" high="22" variants="A6XX-A7XX"/> + </bitset> + <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_point_minmax" inline="yes"> <bitfield name="MIN" low="0" high="15" type="ufixed" radix="4"/> <bitfield name="MAX" low="16" high="31" type="ufixed" radix="4"/> - </reg32> - <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" usage="rp_blit"/> + </bitset> + + <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_depth_cntl" inline="yes"> + <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_stencil_cntl" inline="yes"> + <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_render_cntl" inline="yes"> + <bitfield name="FS_DISABLE" pos="7" type="boolean"/> + </bitset> + + <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="rp_blit"/> + <!-- 0x8093 invalid --> - <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" usage="rp_blit"> + <bitset name="a6xx_depth_plane_cntl" inline="yes"> <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> - <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" usage="rp_blit"/> - <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" usage="rp_blit"/> - <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" usage="rp_blit"/> - <!-- duplicates RB_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <bitset name="a6xx_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> + + <!-- duplicates RB_DEPTH_BUFFER_INFO: --> + <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" usage="cmd"> + <bitset name="a6xx_gras_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> <enum name="a6xx_shift_amount"> <value value="0" name="NO_SHIFT"/> @@ -928,7 +1005,10 @@ by a particular renderpass/blit. <bitfield name="SHIFTAMOUNT" low="1" high="2" type="a6xx_shift_amount"/> <bitfield name="INNERCONSERVATIVERASEN" pos="3" type="boolean"/> <bitfield name="UNK4" low="4" high="5"/> - </reg32> + </bitset> + + <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_gras_su_conservative_ras_cntl" variants="A6XX-A7XX" usage="cmd"/> + <reg32 offset="0x809a" name="GRAS_SU_PATH_RENDERING_CNTL"> <bitfield name="UNK0" pos="0" type="boolean"/> <bitfield name="LINELENGTHEN" pos="1" type="boolean"/> @@ -938,10 +1018,13 @@ by a particular renderpass/blit. <bitfield name="WRITES_LAYER" pos="0" type="boolean"/> <bitfield name="WRITES_VIEW" pos="1" type="boolean"/> </bitset> - <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <!-- 0x809e/0x809f invalid --> + <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_rast_cntl" inline="yes"> + <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> + </bitset> <enum name="a6xx_sequenced_thread_dist"> <value value="0x0" name="DIST_SCREEN_COORD"/> @@ -989,7 +1072,7 @@ by a particular renderpass/blit. <value value="0x3" name="RB_BT"/> </enum> - <reg32 offset="0x80a0" name="GRAS_SC_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_cntl" inline="yes"> <bitfield name="CCUSINGLECACHELINESIZE" low="0" high="2"/> <bitfield name="SINGLE_PRIM_MODE" low="3" high="4" type="a6xx_single_prim_mode"/> <bitfield name="RASTER_MODE" pos="5" type="a6xx_raster_mode"/> @@ -999,7 +1082,9 @@ by a particular renderpass/blit. <bitfield name="UNK9" pos="9" type="boolean"/> <bitfield name="ROTATION" low="10" high="11" type="uint"/> <bitfield name="EARLYVIZOUTEN" pos="12" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_render_mode"> <value value="0x0" name="RENDERING_PASS"/> @@ -1020,7 +1105,7 @@ by a particular renderpass/blit. <value value="0x4" name="LRZ_FEEDBACK_LATE_Z"/> </enum> - <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" usage="rp_blit"> + <bitset name="a6xx_bin_cntl" inline="yes"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> @@ -1033,18 +1118,25 @@ by a particular renderpass/blit. In sysmem mode GRAS_LRZ_CNTL.LRZ_WRITE is not considered. </doc> <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - <bitfield name="UNK27" pos="27"/> - </reg32> + <bitfield name="FORCE_LRZ_DIS" pos="27" type="boolean"/> + </bitset> + + <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_ras_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="UNK2" pos="2"/> <bitfield name="UNK3" pos="3"/> - </reg32> - <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_sc_dest_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_msaa_sample_pos_cntl" inline="yes"> <bitfield name="UNK0" pos="0"/> @@ -1062,30 +1154,35 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/> </bitset> - <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> - <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> - <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> + <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a7" name="GRAS_UNKNOWN_80A7" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x80a7" name="GRAS_ROTATION_CNTL" variants="A7XX" usage="cmd"/> - <!-- 0x80a7-0x80ae invalid --> - <reg32 offset="0x80af" name="GRAS_UNKNOWN_80AF" pos="0" usage="cmd"/> + <bitset name="a6xx_screen_scissor_cntl" inline="yes"> + <bitfield name="SCISSOR_DISABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x80af" name="GRAS_SC_SCREEN_SCISSOR_CNTL" type="a6xx_screen_scissor_cntl" variants="A6XX-A7XX" pos="0" usage="cmd"/> <bitset name="a6xx_scissor_xy" inline="yes"> <bitfield name="X" low="0" high="15" type="uint"/> <bitfield name="Y" low="16" high="31" type="uint"/> </bitset> - <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_fsr_combiner"> <value value="0" name="FSR_COMBINER_OP_KEEP"/> @@ -1095,7 +1192,7 @@ by a particular renderpass/blit. <value value="4" name="FSR_COMBINER_OP_MUL"/> </enum> - <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_vrs_config"> <bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/> <bitfield name="FRAG_SIZE_X" low="1" high="2" type="uint"/> <bitfield name="FRAG_SIZE_Y" low="3" high="4" type="uint"/> @@ -1103,20 +1200,32 @@ by a particular renderpass/blit. <bitfield name="COMBINER_OP_2" low="8" high="10" type="a6xx_fsr_combiner"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="13" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="20" type="boolean"/> - </reg32> - <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_info" inline="yes"> <bitfield name="LAYERED" pos="0" type="boolean"/> <bitfield name="TILE_MODE" low="1" high="2" type="a6xx_tile_mode"/> - </reg32> - <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_dimension" inline="yes"> <bitfield name="WIDTH" low="0" high="15" type="uint"/> <bitfield name="HEIGHT" low="16" high="31" type="uint"/> - </reg32> - <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX-" type="waddress" usage="rp_blit"/> - <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="rp_blit"/> + + <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_pitch" inline="yes"> <bitfield name="PITCH" shr="6" low="0" high="7" type="uint"/> <bitfield name="ARRAY_PITCH" shr="6" low="10" high="28" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_lrz_dir_status"> <value value="0x1" name="LRZ_DIR_LE"/> @@ -1124,7 +1233,7 @@ by a particular renderpass/blit. <value value="0x3" name="LRZ_DIR_INVALID"/> </enum> - <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_cntl" inline="yes"> <bitfield name="ENABLE" pos="0" type="boolean"/> <doc>LRZ write also disabled for blend/etc.</doc> <bitfield name="LRZ_WRITE" pos="1" type="boolean"/> @@ -1151,26 +1260,36 @@ by a particular renderpass/blit. </doc> <bitfield name="DISABLE_ON_WRONG_DIR" pos="9" type="boolean" variants="A6XX"/> <bitfield name="Z_FUNC" low="11" high="13" type="adreno_compare_func" variants="A7XX-"/> - </reg32> + </bitset> + + <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A6XX-A7XX"/> <enum name="a6xx_fragcoord_sample_mode"> <value value="0" name="FRAGCOORD_CENTER"/> <value value="3" name="FRAGCOORD_SAMPLE"/> </enum> - <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" low="0" high="2" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_ps_input_cntl" inline="yes"> <bitfield name="SAMPLEID" pos="0" type="boolean"/> <bitfield name="FRAGCOORDSAMPLEMODE" low="1" high="2" type="a6xx_fragcoord_sample_mode"/> - </reg32> + </bitset> + + <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_mrt_buffer_info_0" inline="yes"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> - </reg32> - <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit"/> - <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A6XX-A7XX"/> + + <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A6XX-A7XX"/> + + <bitset name="a6xx_gras_lrz_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="7" shr="5" type="uint"/> <bitfield name="ARRAY_PITCH" low="10" high="28" shr="8" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A6XX-A7XX"/> <!-- The LRZ "fast clear" buffer is initialized to zero's by blob, and @@ -1203,7 +1322,6 @@ by a particular renderpass/blit. not. --> <reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="rp_blit"/> - <!-- 0x8108 invalid --> <reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="rp_blit"> <bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/> </reg32> @@ -1228,19 +1346,20 @@ by a particular renderpass/blit. <!-- 0x810c-0x810f invalid --> - <reg32 offset="0x8110" name="GRAS_UNKNOWN_8110" low="0" high="1" usage="cmd"/> + <reg32 offset="0x8110" name="GRAS_MODE_CNTL" low="0" high="1" variants="A6XX-A7XX" usage="cmd"/> <!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR --> - <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX-"/> + <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX"/> - <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> + + <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_gras_lrz_depth_buffer_info" variants="A7XX" usage="rp_blit"/> - <!-- Always written together and always equal 09510840 00000a62 --> - <reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x8121" name="GRAS_UNKNOWN_8121" variants="A7XX-" usage="cmd"/> + <doc>LUT used to convert quality buffer values to HW shading rate values. An array of 4-bit values.</doc> + <array offset="0x8120" name="GRAS_LRZ_QUALITY_LOOKUP_TABLE" variants="A7XX-" stride="1" length="2"/> <!-- 0x8112-0x83ff invalid --> @@ -1265,28 +1384,29 @@ by a particular renderpass/blit. <bitfield name="D24S8" pos="19" type="boolean"/> <!-- some sort of channel mask, disabled channels are set to zero ? --> <bitfield name="MASK" low="20" high="23"/> - <bitfield name="IFMT" low="24" high="28" type="a6xx_2d_ifmt"/> + <bitfield name="IFMT" low="24" high="26" type="a6xx_2d_ifmt"/> + <bitfield name="UNK27" pos="27" type="boolean"/> + <bitfield name="UNK28" pos="28" type="boolean"/> <bitfield name="RASTER_MODE" pos="29" type="a6xx_raster_mode"/> - <bitfield name="UNK30" pos="30" type="boolean" variants="A7XX-"/> + <bitfield name="COPY" pos="30" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" usage="rp_blit"/> + <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- note: the low 8 bits for src coords are valid, probably fixed point it would be a bit weird though, since we subtract 1 from BR coords apparently signed, gallium driver uses negative coords and it works? --> - <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x8407" name="GRAS_2D_UNKNOWN_8407" low="0" high="31"/> <reg32 offset="0x8408" name="GRAS_2D_UNKNOWN_8408" low="0" high="31"/> <reg32 offset="0x8409" name="GRAS_2D_UNKNOWN_8409" low="0" high="31"/> - <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> - <!-- 0x840c-0x85ff invalid --> + <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <!-- always 0x880 ? (and 0 in a640/a650 traces?) --> <reg32 offset="0x8600" name="GRAS_DBG_ECO_CNTL" usage="cmd"> @@ -1304,22 +1424,7 @@ by a particular renderpass/blit. --> <!-- same as GRAS_BIN_CONTROL, but without bit 27: --> - <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="BUFFERS_LOCATION" low="22" high="23" type="a6xx_buffers_location"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> - - <reg32 offset="0x8800" name="RB_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> + <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="rp_blit"/> <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="rp_blit"> <bitfield name="CCUSINGLECACHELINESIZE" low="3" high="5"/> @@ -1343,9 +1448,6 @@ by a particular renderpass/blit. <bitfield name="CONSERVATIVERASEN" pos="11" type="boolean"/> <bitfield name="INNERCONSERVATIVERASEN" pos="12" type="boolean"/> </reg32> - <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="FS_DISABLE" pos="7" type="boolean"/> - </reg32> <reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="rp_blit"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> @@ -1512,9 +1614,7 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_MASK" low="16" high="31"/> </reg32> <!-- 0x8866-0x886f invalid --> - <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" usage="rp_blit"> - <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> + <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="rp_blit"/> <reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="rp_blit"> <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> @@ -1528,14 +1628,9 @@ by a particular renderpass/blit. <bitfield name="Z_READ_ENABLE" pos="6" type="boolean"/> <bitfield name="Z_BOUNDS_ENABLE" pos="7" type="boolean"/> </reg32> - <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" usage="rp_blit"> - <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> - </reg32> + <!-- duplicates GRAS_SU_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" usage="rp_blit"> - <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> - <bitfield name="UNK3" low="3" high="4"/> - </reg32> + <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="rp_blit"/> <!-- first 4 bits duplicates GRAS_SU_DEPTH_BUFFER_INFO --> <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> @@ -1571,9 +1666,7 @@ by a particular renderpass/blit. <bitfield name="ZPASS_BF" low="26" high="28" type="adreno_stencil_op"/> <bitfield name="ZFAIL_BF" low="29" high="31" type="adreno_stencil_op"/> </reg32> - <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" usage="rp_blit"> - <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> - </reg32> + <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="rp_blit"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> <bitfield name="UNK1" pos="1" type="boolean"/> @@ -1612,8 +1705,9 @@ by a particular renderpass/blit. <reg32 offset="0x8899" name="RB_UNKNOWN_8899" variants="A7XX-" usage="cmd"/> <!-- 0x8899-0x88bf invalid --> <!-- clamps depth value for depth test/write --> - <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit"/> - <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit"/> + <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <!-- 0x88c2-0x88cf invalid--> <reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="rp_blit"> <bitfield name="UNK0" low="0" high="12"/> @@ -1622,7 +1716,7 @@ by a particular renderpass/blit. <reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="rp_blit"/> <reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="rp_blit"/> <!-- weird to duplicate other regs from same block?? --> - <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" usage="rp_blit"> + <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="rp_blit"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> </reg32> @@ -1646,10 +1740,13 @@ by a particular renderpass/blit. <!-- array-pitch is size of layer --> <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="rp_blit"/> <reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" usage="rp_blit"> + + <bitset name="a6xx_flag_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> - </reg32> + <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> + </bitset> + + <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="rp_blit"/> <reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="rp_blit"/> @@ -1722,10 +1819,7 @@ by a particular renderpass/blit. <reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" usage="cmd"/> <!-- could be for separate stencil? (or may not be a flag buffer at all) --> <reg64 offset="0x88f1" name="RB_UNK_FLAG_BUFFER_BASE" type="waddress" align="64"/> - <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="23" shr="7" type="uint"/> - </reg32> + <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch"/> <reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="rp_blit"> <bitfield name="UNK2" pos="2" type="boolean"/> @@ -1733,8 +1827,9 @@ by a particular renderpass/blit. <bitfield name="ATTACHMENT_FSR_ENABLE" pos="5" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="18" type="boolean"/> </reg32> - <!-- Connected to VK_EXT_fragment_density_map? --> - <reg32 offset="0x88f5" name="RB_UNKNOWN_88F5" variants="A7XX-"/> + <reg32 offset="0x88f5" name="RB_BIN_FOVEAT" variants="A7XX-" usage="cmd"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + </reg32> <!-- 0x88f6-0x88ff invalid --> <reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="rp_blit"> @@ -1743,12 +1838,10 @@ by a particular renderpass/blit. <bitfield name="UNK8" low="8" high="10"/> <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> </reg32> + <array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="rp_blit"> <reg64 offset="0" name="ADDR" type="waddress" align="64"/> - <reg32 offset="2" name="PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> - </reg32> + <reg32 offset="2" name="PITCH" type="a6xx_flag_buffer_pitch"/> </array> <!-- 0x891b-0x8926 invalid --> <doc> @@ -1811,7 +1904,7 @@ by a particular renderpass/blit. <reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="rp_blit"/> <reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> + <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <!-- this is a guess but seems likely (for NV12 with UBWC): --> <reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> @@ -1917,13 +2010,13 @@ by a particular renderpass/blit. <bitfield name="CLIP_DIST_03_LOC" low="8" high="15" type="uint"/> <bitfield name="CLIP_DIST_47_LOC" low="16" high="23" type="uint"/> </bitset> - <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_vpc_xs_siv_cntl" inline="yes"> <bitfield name="LAYERLOC" low="0" high="7" type="uint"/> @@ -1931,23 +2024,33 @@ by a particular renderpass/blit. <bitfield name="SHADINGRATELOC" low="16" high="23" type="uint" variants="A7XX-"/> </bitset> - <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + + <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_rast_stream_cntl" inline="yes"> + <!-- which stream to send to GRAS --> + <bitfield name="STREAM" low="0" high="1" type="uint"/> + <!-- discard primitives before rasterization --> + <bitfield name="DISCARD" pos="2" type="boolean"/> + </bitset> - <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> <reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="rp_blit"> <!-- this mirrors VPC_RAST_STREAM_CNTL::DISCARD, although it seems it's unused --> <bitfield name="RASTER_DISCARD" pos="0" type="boolean"/> <bitfield name="UNK2" pos="2" type="boolean"/> </reg32> - <reg32 offset="0x9108" name="VPC_RAST_CNTL" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> + <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_cntl" inline="yes"> <bitfield name="PRIMITIVE_RESTART" pos="0" type="boolean"/> <bitfield name="PROVOKING_VTX_LAST" pos="1" type="boolean"/> @@ -1987,10 +2090,10 @@ by a particular renderpass/blit. <bitfield name="VIEWS" low="2" high="6" type="uint"/> </bitset> - <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_varying_interp_mode"> <value value="0" name="INTERP_SMOOTH"/> @@ -2007,11 +2110,11 @@ by a particular renderpass/blit. </enum> <!-- 0x9109-0x91ff invalid --> - <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" usage="rp_blit"> + <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_interp_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE_0" stride="1" length="8" usage="rp_blit"> + <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_ps_repl_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> @@ -2020,12 +2123,12 @@ by a particular renderpass/blit. <reg32 offset="0x9210" name="VPC_UNKNOWN_9210" low="0" high="31" variants="A6XX" usage="cmd"/> <reg32 offset="0x9211" name="VPC_UNKNOWN_9211" low="0" high="31" variants="A6XX" usage="cmd"/> - <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL_0" stride="1" length="4" usage="rp_blit"> + <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="rp_blit"> <!-- one bit per varying component: --> <reg32 offset="0" name="DISABLE"/> </array> - <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" usage="rp_blit"> + <bitset name="a6xx_vpc_so_mapping_wptr" inline="yes"> <!-- Choose which DWORD to write to. There is an array of (4 * 64) DWORD's, dumped in the devcoredump at @@ -2052,20 +2155,25 @@ by a particular renderpass/blit. <bitfield name="ADDR" low="0" high="7" type="hex"/> <!-- clear all A_EN and B_EN bits for all DWORD's --> <bitfield name="RESET" pos="16" type="boolean"/> - </reg32> - <!-- special register, write multiple times to load SO program (not readable) --> - <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_so_mapping_port" inline="yes"> <bitfield name="A_BUF" low="0" high="1" type="uint"/> <bitfield name="A_OFF" low="2" high="10" shr="2" type="uint"/> <bitfield name="A_EN" pos="11" type="boolean"/> <bitfield name="B_BUF" low="12" high="13" type="uint"/> <bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/> <bitfield name="B_EN" pos="23" type="boolean"/> - </reg32> + </bitset> + + <!-- special register, write multiple times to load SO program (not readable) --> + <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="rp_blit"/> - <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A6XX-A7XX" usage="cmd"/> - <array offset="0x921a" name="VPC_SO" stride="7" length="4" usage="cmd"> + <array offset="0x921a" name="VPC_SO" stride="7" length="4" variants="A6XX-A7XX" usage="cmd"> <reg64 offset="0" name="BUFFER_BASE" type="waddress" align="32"/> <reg32 offset="2" name="BUFFER_SIZE" low="2" high="31" shr="2"/> <reg32 offset="3" name="BUFFER_STRIDE" low="0" high="9" shr="2"/> @@ -2073,12 +2181,13 @@ by a particular renderpass/blit. <reg64 offset="5" name="FLUSH_BASE" type="waddress" align="32"/> </array> - <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" usage="cmd"> + <bitset name="a6xx_vpc_replace_mode_cntl" inline="yes"> <bitfield name="INVERT" pos="0" type="boolean"/> - </reg32> - <!-- 0x9237-0x92ff invalid --> - <!-- always 0x0 ? --> - <reg32 offset="0x9300" name="VPC_UNKNOWN_9300" low="0" high="2" usage="cmd"/> + </bitset> + + <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" type="a6xx_vpc_replace_mode_cntl" variants="A6XX-A7XX" usage="cmd"/> + + <reg32 offset="0x9300" name="VPC_ROTATION_CNTL" low="0" high="2" variants="A6XX-A7XX" usage="cmd"/> <bitset name="a6xx_vpc_xs_cntl" inline="yes"> <doc> @@ -2097,11 +2206,12 @@ by a particular renderpass/blit. </doc> </bitfield> </bitset> - <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9304" name="VPC_PS_CNTL" usage="rp_blit"> + <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_ps_cntl" inline="yes"> <bitfield name="NUMNONPOSVAR" low="0" high="7" type="uint"/> <!-- for fixed-function (i.e. no GS) gl_PrimitiveID in FS --> <bitfield name="PRIMIDLOC" low="8" high="15" type="uint"/> @@ -2118,9 +2228,11 @@ by a particular renderpass/blit. ViewID through the VS. </doc> </bitfield> - </reg32> + </bitset> + + <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9305" name="VPC_SO_CNTL" usage="rp_blit"> + <bitset name="a6xx_vpc_so_cntl" inline="yes"> <!-- It's offset by 1, and 0 means "disabled" --> @@ -2129,22 +2241,28 @@ by a particular renderpass/blit. <bitfield name="BUF2_STREAM" low="6" high="8" type="uint"/> <bitfield name="BUF3_STREAM" low="9" high="11" type="uint"/> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> - <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_so_override" inline="yes"> <bitfield name="DISABLE" pos="0" type="boolean"/> - </reg32> - <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" variants="A6XX-" usage="rp_blit"> <!-- A702 + A7xx --> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX-" usage="rp_blit"> - <bitfield name="BASE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> + </bitset> + + <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="rp_blit"/> + + <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x930a" name="VPC_UNKNOWN_930A" variants="A7XX"/> + + <reg32 offset="0x960a" name="VPC_FLATSHADE_MODE_CNTL" variants="A7XX"/> <!-- 0x9307-0x95ff invalid --> @@ -2159,52 +2277,62 @@ by a particular renderpass/blit. <!-- TODO: regs from 0x9624-0x963a --> <!-- 0x963b-0x97ff invalid --> - <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" usage="rp_blit"/> + <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <!-- always 0x0 ? --> - <reg32 offset="0x9801" name="PC_HS_PARAM_1" usage="rp_blit"> + <bitset name="a6xx_pc_hs_param_1" inline="yes"> <bitfield name="SIZE" low="0" high="10" type="uint"/> <bitfield name="UNK13" pos="13"/> - </reg32> + </bitset> - <reg32 offset="0x9802" name="PC_DS_PARAM" usage="rp_blit"> + <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_pc_ds_param" inline="yes"> <bitfield name="SPACING" low="0" high="1" type="a6xx_tess_spacing"/> <bitfield name="OUTPUT" low="2" high="3" type="a6xx_tess_output"/> - </reg32> + </bitset> + + <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" usage="rp_blit"/> - <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" usage="rp_blit"/> + <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" usage="rp_blit"/> - <reg32 offset="0x9806" name="PC_PS_CNTL" usage="rp_blit"> + <bitset name="a6xx_pc_ps_cntl" inline="yes"> <bitfield name="PRIMITIVEIDEN" pos="0" type="boolean"/> - </reg32> + </bitset> - <!-- New in a6xx gen3+ --> - <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" usage="rp_blit"> + <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_pc_dgen_so_cntl" inline="yes"> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> + </bitset> - <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL"> + <!-- New in a6xx gen3+ --> + <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_pc_dgen_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> - </reg32> - <!-- 0x980b-0x983f invalid --> + </bitset> + + <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A6XX-A7XX"/> <!-- 0x9840 - 0x9842 are not readable --> - <reg32 offset="0x9840" name="PC_DRAW_INITIATOR"> + <bitset name="a6xx_draw_initiator" inline="yes"> <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + </bitset> - <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR"> - <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + <reg32 offset="0x9840" name="PC_DRAW_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> + <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> - <reg32 offset="0x9842" name="PC_EVENT_INITIATOR"> + <bitset name="a6xx_event_initiator" inline="yes"> <!-- I think only the low bit is actually used? --> <bitfield name="STATE_ID" low="16" high="23"/> <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> - </reg32> + </bitset> + + <reg32 offset="0x9842" name="PC_EVENT_INITIATOR" type="a6xx_event_initiator" variants="A6XX-A7XX"/> <!-- 0x9880 written in a lot of places by SQE, same value gets written @@ -2215,45 +2343,21 @@ by a particular renderpass/blit. <!-- 0x9843-0x997f invalid --> - <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - - <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" variants="A6XX" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <!-- VPC_RAST_STREAM_CNTL --> - <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> + <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="rp_blit"/> <!-- Both are a750+. Probably needed to correctly overlap execution of several draws. --> - <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- Blob adds a bit more space {0x10, 0x20, 0x30, 0x40} bytes, but the meaning of this additional space is not known. --> - <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- 0x9982-0x9aff invalid --> - <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" usage="rp_blit"/> + <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_xs_cntl" inline="yes"> <doc> @@ -2266,18 +2370,18 @@ by a particular renderpass/blit. <bitfield name="LAYER" pos="9" type="boolean"/> <bitfield name="VIEW" pos="10" type="boolean"/> <!-- note: PC_VS_CNTL doesn't have the PRIMITIVE_ID bit --> + <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> <bitfield name="PRIMITIVE_ID" pos="11" type="boolean"/> <bitfield name="CLIP_MASK" low="16" high="23" type="uint"/> <bitfield name="SHADINGRATE" pos="24" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> - <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> + <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" usage="rp_blit"/> + <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="rp_blit"> <doc> @@ -2286,9 +2390,9 @@ by a particular renderpass/blit. <bitfield name="STRIDE_IN_VPC" low="0" high="10" type="uint"/> </reg32> - <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="rp_blit"/> + <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- mask of enabled views, doesn't exist on A630 --> - <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" usage="rp_blit"/> + <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="rp_blit"/> <!-- 0x9b09-0x9bff invalid --> <reg32 offset="0x9c00" name="PC_2D_EVENT_CMD"> <!-- special register (but note first 8 bits can be written/read) --> @@ -2299,34 +2403,39 @@ by a particular renderpass/blit. <!-- TODO: 0x9e00-0xa000 range incomplete --> <reg32 offset="0x9e00" name="PC_DBG_ECO_CNTL"/> <reg32 offset="0x9e01" name="PC_ADDR_MODE_CNTL" type="a5xx_address_mode"/> - <reg64 offset="0x9e04" name="PC_DMA_BASE"/> - <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint"/> - <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint"/> + <reg64 offset="0x9e04" name="PC_DMA_BASE" type="address" variants="A6XX-A7XX"/> + <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint" variants="A6XX-A7XX"/> + <reg64 offset="0x9e08" name="PC_TESS_BASE" variants="A6XX" type="waddress" align="32" usage="cmd"/> - <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX-" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX" type="waddress" align="32" usage="cmd"/> - <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx"> + <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx" variants="A6XX-A7XX"> <doc> Possibly not really "initiating" the draw but the layout is similar to VGT_DRAW_INITIATOR on older gens </doc> </reg32> - <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint"/> - <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint"/> + <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint" variants="A6XX-A7XX"/> <!-- These match the contents of CP_SET_BIN_DATA (not written directly) --> - <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL"> + <bitset name="a6xx_pc_vis_stream_cntl" inline="yes"> <bitfield name="UNK0" low="0" high="15"/> <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> <bitfield name="VSC_N" low="22" high="26" type="uint"/> - </reg32> - <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32"/> - <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32"/> + </bitset> + + <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL" type="a6xx_pc_vis_stream_cntl" variants="A6XX-A7XX"/> + <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> + <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> - <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE"> + <bitset name="a6xx_pc_drawcall_cntl_override" inline="yes"> <doc>Written by CP_SET_VISIBILITY_OVERRIDE handler</doc> <bitfield name="OVERRIDE" pos="0" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE" type="a6xx_pc_drawcall_cntl_override" variants="A6XX-A7XX"/> <reg32 offset="0x9e24" name="PC_UNKNOWN_9E24" variants="A7XX-" usage="cmd"/> @@ -2932,7 +3041,7 @@ by a particular renderpass/blit. <reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="cmd"/> <reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="cmd"/> <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="cmd"/> - <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" align="32" usage="cmd"/> + <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="cmd"/> <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="cmd"/> <reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="cmd"/> <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="cmd"/> @@ -3017,7 +3126,7 @@ by a particular renderpass/blit. UAV state for compute shader: --> <reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX"/> - <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX"/> + <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-"/> <reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint"/> <!-- Correlated with avgs/uvgs usage in FS --> @@ -3100,14 +3209,19 @@ by a particular renderpass/blit. instructions VS/HS/DS/GS/FS. See SP_CS_UAV_BASE_* for compute shaders. --> <reg64 offset="0xab1a" name="SP_GFX_UAV_BASE" type="address" align="16" usage="cmd"/> - <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" usage="cmd"/> + <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" variants="A6XX-A7XX" usage="cmd"/> - <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX-" usage="cmd"/> + <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX" usage="cmd"/> + + <enum name="a6xx_sp_a2d_output_ifmt_type"> + <value name="OUTPUT_IFMT_2D_FLOAT" value="0"/> + <value name="OUTPUT_IFMT_2D_SINT" value="1"/> + <value name="OUTPUT_IFMT_2D_UINT" value="2"/> + </enum> <bitset name="a6xx_sp_a2d_output_info" inline="yes"> - <bitfield name="NORM" pos="0" type="boolean"/> - <bitfield name="SINT" pos="1" type="boolean"/> - <bitfield name="UINT" pos="2" type="boolean"/> + <bitfield name="HALF_PRECISION" pos="0" type="boolean"/> + <bitfield name="IFMT_TYPE" low="1" high="2" type="a6xx_sp_a2d_output_ifmt_type"/> <!-- looks like HW only cares about the base type of this format, which matches the ifmt? --> <bitfield name="COLOR_FORMAT" low="3" high="10" type="a6xx_format"/> @@ -3152,7 +3266,7 @@ by a particular renderpass/blit. <reg32 offset="0xae6b" name="SP_UNKNOWN_AE6B" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6c" name="SP_HLSQ_DBG_ECO_CNTL" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6d" name="SP_READ_SEL" variants="A7XX-"> - <bitfield name="LOCATION" low="18" high="19" type="a7xx_state_location"/> + <bitfield name="LOCATION" low="18" high="20" type="a7xx_state_location"/> <bitfield name="PIPE" low="16" high="17" type="a7xx_pipe"/> <bitfield name="STATETYPE" low="8" high="15" type="a7xx_statetype_id"/> <bitfield name="USPTP" low="4" high="7"/> @@ -3188,7 +3302,7 @@ by a particular renderpass/blit. <!-- looks to work in the same way as a5xx: --> <reg64 offset="0xb302" name="TPL1_GFX_BORDER_COLOR_BASE" type="address" align="128" usage="cmd"/> - <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> + <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> @@ -3228,12 +3342,12 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX"> + <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-"> <bitfield name="WIDTH" low="0" high="14" type="uint"/> <bitfield name="HEIGHT" low="15" high="29" type="uint"/> </reg32> <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX"> + <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-"> <!-- Bits from 3..9 must be zero unless 'TPL1_A2D_BLT_CNTL::TYPE' is A6XX_TEX_IMG_BUFFER, which allows for lower alignment. @@ -3266,13 +3380,13 @@ by a particular renderpass/blit. <reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX"/> <reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX"/> <reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX"/> - <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX"/> + <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-"/> <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" variants="A7XX-" usage="rp_blit"> <bitfield name="RAW_COPY" pos="0" type="boolean"/> <bitfield name="START_OFFSET_TEXELS" low="16" high="21"/> <bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/> </reg32> - <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="rp_blit"/> <!-- always 0x100000 or 0x1000000? --> <reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" low="0" high="25" usage="cmd"/> @@ -3292,17 +3406,13 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb605" name="TPL1_UNKNOWN_B605" low="0" high="7" type="uint" variants="A6XX" usage="cmd"/> <!-- always 0x0 or 0x44 ? --> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A6XX"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A6XX"> + <reg32 offset="0" name="REG" low="0" high="29"/> + </array> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A7XX" usage="cmd"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A7XX"> + <reg32 offset="0" name="REG" low="0" high="29" usage="cmd"/> + </array> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="12" variants="A6XX"/> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="18" variants="A7XX"/> @@ -3634,7 +3744,7 @@ by a particular renderpass/blit. <reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> <reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX_0" stride="1" length="64" variants="A7XX-"/> + <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX"/> <reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd"> <doc> @@ -3796,6 +3906,14 @@ by a particular renderpass/blit. <reg32 offset="0x0030" name="CFG_DBGBUS_TRACE_BUF2"/> </domain> +<domain name="A7XX_CX_DBGC" width="32" varset="chip"> + <!-- Bitfields shifted, but otherwise the same: --> + <reg32 offset="0x0000" name="CFG_DBGBUS_SEL_A" variants="A7XX-"> + <bitfield high="7" low="0" name="PING_INDEX"/> + <bitfield high="24" low="16" name="PING_BLK_SEL"/> + </reg32> +</domain> + <domain name="A6XX_CX_MISC" width="32" prefix="variant" varset="chip"> <reg32 offset="0x0001" name="SYSTEM_CACHE_CNTL_0"/> <reg32 offset="0x0002" name="SYSTEM_CACHE_CNTL_1"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml index 307d43dda8a2..56cfaff614a4 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml @@ -9,38 +9,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_SAMP" width="32"> <doc>Texture sampler dwords</doc> - <enum name="a6xx_tex_filter"> <!-- same as a4xx? --> - <value name="A6XX_TEX_NEAREST" value="0"/> - <value name="A6XX_TEX_LINEAR" value="1"/> - <value name="A6XX_TEX_ANISO" value="2"/> - <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> - </enum> - <enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> - <value name="A6XX_TEX_REPEAT" value="0"/> - <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> - <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> - <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> - <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> - </enum> - <enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> - <value name="A6XX_TEX_ANISO_1" value="0"/> - <value name="A6XX_TEX_ANISO_2" value="1"/> - <value name="A6XX_TEX_ANISO_4" value="2"/> - <value name="A6XX_TEX_ANISO_8" value="3"/> - <value name="A6XX_TEX_ANISO_16" value="4"/> - </enum> - <enum name="a6xx_reduction_mode"> - <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> - <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> - <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> - </enum> - <enum name="a6xx_fast_border_color"> - <!-- R B G A --> - <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> - <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> - <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> - <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="MIPFILTER_LINEAR_NEAR" pos="0" type="boolean"/> @@ -79,14 +47,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_CONST" width="32" varset="chip"> <doc>Texture constant dwords</doc> - <enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> - <value name="A6XX_TEX_X" value="0"/> - <value name="A6XX_TEX_Y" value="1"/> - <value name="A6XX_TEX_Z" value="2"/> - <value name="A6XX_TEX_W" value="3"/> - <value name="A6XX_TEX_ZERO" value="4"/> - <value name="A6XX_TEX_ONE" value="5"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> <bitfield name="SRGB" pos="2" type="boolean"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml index 665539b098c6..4e42f055b85f 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml @@ -320,14 +320,14 @@ to upconvert to 32b float internally? 16b float: 3 --> <enum name="a6xx_2d_ifmt"> - <value value="0x10" name="R2D_UNORM8"/> <value value="0x7" name="R2D_INT32"/> <value value="0x6" name="R2D_INT16"/> <value value="0x5" name="R2D_INT8"/> <value value="0x4" name="R2D_FLOAT32"/> <value value="0x3" name="R2D_FLOAT16"/> + <value value="0x2" name="R2D_SNORM8"/> <value value="0x1" name="R2D_UNORM8_SRGB"/> - <value value="0x0" name="R2D_RAW"/> + <value value="0x0" name="R2D_UNORM8"/> </enum> <enum name="a6xx_tex_type"> @@ -380,4 +380,50 @@ to upconvert to 32b float internally? <value value="0x3" name="TESS_CCW_TRIS"/> </enum> +<enum name="a6xx_tex_filter"> <!-- same as a4xx? --> + <value name="A6XX_TEX_NEAREST" value="0"/> + <value name="A6XX_TEX_LINEAR" value="1"/> + <value name="A6XX_TEX_ANISO" value="2"/> + <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> +</enum> + +<enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> + <value name="A6XX_TEX_REPEAT" value="0"/> + <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> + <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> + <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> + <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> +</enum> + +<enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> + <value name="A6XX_TEX_ANISO_1" value="0"/> + <value name="A6XX_TEX_ANISO_2" value="1"/> + <value name="A6XX_TEX_ANISO_4" value="2"/> + <value name="A6XX_TEX_ANISO_8" value="3"/> + <value name="A6XX_TEX_ANISO_16" value="4"/> +</enum> + +<enum name="a6xx_reduction_mode"> + <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> + <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> + <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> +</enum> + +<enum name="a6xx_fast_border_color"> + <!-- R B G A --> + <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> + <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> + <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> + <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> +</enum> + +<enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> + <value name="A6XX_TEX_X" value="0"/> + <value name="A6XX_TEX_Y" value="1"/> + <value name="A6XX_TEX_Z" value="2"/> + <value name="A6XX_TEX_W" value="3"/> + <value name="A6XX_TEX_ZERO" value="4"/> + <value name="A6XX_TEX_ONE" value="5"/> +</enum> + </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml index 3d2cc339b8f1..b15a242d974d 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml @@ -99,6 +99,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="GX_HM_GDSC_POWER_OFF" pos="6" type="boolean"/> <bitfield name="GX_HM_CLK_OFF" pos="7" type="boolean"/> </reg32> + <reg32 offset="0x50d0" name="GMU_SPTPRAC_PWR_CLK_STATUS" variants="A7XX"> + <bitfield name="GX_HM_GDSC_POWER_OFF" pos="0" type="boolean"/> + <bitfield name="GX_HM_CLK_OFF" pos="1" type="boolean"/> + </reg32> <reg32 offset="0x50e4" name="GMU_GPU_NAP_CTRL"> <bitfield name="HW_NAP_ENABLE" pos="0"/> <bitfield name="SID" low="4" high="8"/> @@ -127,6 +131,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x5088" name="GMU_ALWAYS_ON_COUNTER_L"/> <reg32 offset="0x5089" name="GMU_ALWAYS_ON_COUNTER_H"/> <reg32 offset="0x50c3" name="GMU_GMU_PWR_COL_KEEPALIVE"/> + <reg32 offset="0x50c4" name="GMU_PWR_COL_PREEMPT_KEEPALIVE"/> <reg32 offset="0x5180" name="GMU_HFI_CTRL_STATUS"/> <reg32 offset="0x5181" name="GMU_HFI_VERSION_INFO"/> <reg32 offset="0x5182" name="GMU_HFI_SFR_ADDR"/> @@ -228,6 +233,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x03ee" name="RSCC_TCS1_DRV0_STATUS"/> <reg32 offset="0x0496" name="RSCC_TCS2_DRV0_STATUS"/> <reg32 offset="0x053e" name="RSCC_TCS3_DRV0_STATUS"/> + <reg32 offset="0x05e6" name="RSCC_TCS4_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x068e" name="RSCC_TCS5_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0736" name="RSCC_TCS6_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x07de" name="RSCC_TCS7_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0886" name="RSCC_TCS8_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x092e" name="RSCC_TCS9_DRV0_STATUS" variants="A7XX"/> </domain> </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 7abc08635495..0e10e1c6d263 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -120,12 +120,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/> - <value name="UNK_40" value="40" variants="A7XX"/> + <value name="LRZ_CACHE_INVALIDATE" value="40" variants="A7XX"/> <value name="LRZ_Q_CACHE_INVALIDATE" value="41" variants="A7XX"/> <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> - <value name="UNK_2C" value="44" variants="A5XX-"/> - <value name="UNK_2D" value="45" variants="A5XX-"/> + <value name="VSC_BINNING_START" value="44" variants="A5XX-"/> + <value name="VSC_BINNING_END" value="45" variants="A5XX-"/> <!-- a6xx events --> <doc> @@ -523,7 +523,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <!-- Seems to set the mode flags which control which CP_SET_DRAW_STATE packets are executed, based on their ENABLE_MASK values - + CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE packets w/ ENABLE_MASK & 0x6 to execute immediately --> @@ -640,8 +640,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/> <doc> Clears, adds to local, or adds to global timestamp </doc> <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/> - <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? --> - <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/> + <value name="CP_NON_CONTEXT_REG_BUNCH" value="0x5d" variants="A7XX-"/> <doc> Write to a scratch memory that is read by CP_REG_TEST with SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. @@ -918,12 +917,6 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="4" name="4"> - <bitfield name="INDX_BASE_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="INDX_BASE_HI" low="0" high="31"/> - </reg32> <reg64 offset="4" name="INDX_BASE" type="address"/> <reg32 offset="6" name="6"> <!-- max # of elements in index buffer --> @@ -1099,8 +1092,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> - <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> + <!-- high bit is 28 until a750: --> + <bitfield name="GROUP_ID" low="24" high="29" type="uint"/> </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="1" name="1"> <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> </reg32> @@ -1166,26 +1161,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="a7xx_abs_mask_mode" variants="NO_ABS_MASK"> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="1" name="1"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="3" name="3"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="3" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="5" name="5"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="5" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1195,26 +1175,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <stripe varset="a7xx_abs_mask_mode" variants="ABS_MASK"> <reg32 offset="1" name="ABS_MASK"/> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="2" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="4" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="7" name="7"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="6" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1300,7 +1265,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> -<domain name="CP_REG_TO_MEM" width="32"> +<domain name="CP_REG_TO_MEM" width="32" prefix="chip"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="17" type="hex"/> <!-- number of registers/dwords copied is max(CNT, 1). --> @@ -1308,12 +1273,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="DEST" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="DEST" type="address"/> + </stripe> </domain> <domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> @@ -1329,12 +1294,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> <reg32 offset="3" name="3"> <bitfield name="OFFSET0" low="0" high="17" type="hex"/> <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> @@ -1354,18 +1314,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> + <reg64 offset="3" name="OFFSET" type="waddress"/> </domain> <domain name="CP_MEM_TO_REG" width="32"> @@ -1378,12 +1328,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="SRC" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="SRC_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="SRC" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="SRC" type="address"/> + </stripe> </domain> <domain name="CP_MEM_TO_MEM" width="32"> @@ -1403,6 +1353,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- some other kind of wait --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> + <reg64 offset="1" name="DST" type="waddress"/> + <reg64 offset="3" name="SRC_A" type="address"/> + <reg64 offset="5" name="SRC_B" type="address"/> + <reg64 offset="7" name="SRC_C" type="address"/> <!-- followed by sequence of addresses.. the first is the destination and the rest are N src addresses which are @@ -1461,12 +1415,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </domain> <domain name="CP_MEM_WRITE" width="32"> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="0" name="ADDR" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="0" name="ADDR" type="address"/> + </stripe> <!-- followed by the DWORDs to write --> </domain> @@ -1518,24 +1472,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> <reg32 offset="4" name="4"> <bitfield name="MASK" low="0" high="31"/> </reg32> - <reg32 offset="5" name="5"> - <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="5" name="WRITE_ADDR" type="waddress"/> <reg32 offset="7" name="7"> <bitfield name="WRITE_DATA" low="0" high="31"/> </reg32> @@ -1550,12 +1494,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- Reserved for flags, presumably? Unused in FW --> <bitfield name="RESERVED" low="0" high="31" type="hex"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1573,12 +1512,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1712,12 +1646,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) TODO what is gpuaddr for, seems to be all 0's.. maybe needed for context switch? --> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_0_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_0_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="waddress"/> <reg32 offset="3" name="3"> <!-- ??? --> </reg32> @@ -1832,9 +1761,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> </reg32> <stripe varset="chip" variants="A4XX"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR" low="0" high="31"/> - </reg32> + <reg32 offset="1" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -1843,12 +1770,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </stripe> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="3" name="3"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -2161,12 +2083,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </doc> </value> </enum> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="0" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <bitfield name="DWORDS" low="0" high="19" type="uint"/> <bitfield name="TYPE" low="20" high="21" type="amble_type"/> diff --git a/drivers/gpu/drm/msm/registers/display/dsi.xml b/drivers/gpu/drm/msm/registers/display/dsi.xml index 501ffc585a9f..c7a7b633d747 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi.xml @@ -159,28 +159,28 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="RGB_SWAP" low="12" high="14" type="dsi_rgb_swap"/> </reg32> <reg32 offset="0x00020" name="ACTIVE_H"> - <bitfield name="START" low="0" high="11" type="uint"/> - <bitfield name="END" low="16" high="27" type="uint"/> + <bitfield name="START" low="0" high="15" type="uint"/> + <bitfield name="END" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00024" name="ACTIVE_V"> - <bitfield name="START" low="0" high="11" type="uint"/> - <bitfield name="END" low="16" high="27" type="uint"/> + <bitfield name="START" low="0" high="15" type="uint"/> + <bitfield name="END" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00028" name="TOTAL"> - <bitfield name="H_TOTAL" low="0" high="11" type="uint"/> - <bitfield name="V_TOTAL" low="16" high="27" type="uint"/> + <bitfield name="H_TOTAL" low="0" high="15" type="uint"/> + <bitfield name="V_TOTAL" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x0002c" name="ACTIVE_HSYNC"> - <bitfield name="START" low="0" high="11" type="uint"/> - <bitfield name="END" low="16" high="27" type="uint"/> + <bitfield name="START" low="0" high="15" type="uint"/> + <bitfield name="END" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00030" name="ACTIVE_VSYNC_HPOS"> - <bitfield name="START" low="0" high="11" type="uint"/> - <bitfield name="END" low="16" high="27" type="uint"/> + <bitfield name="START" low="0" high="15" type="uint"/> + <bitfield name="END" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00034" name="ACTIVE_VSYNC_VPOS"> - <bitfield name="START" low="0" high="11" type="uint"/> - <bitfield name="END" low="16" high="27" type="uint"/> + <bitfield name="START" low="0" high="15" type="uint"/> + <bitfield name="END" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00038" name="CMD_DMA_CTRL"> @@ -209,8 +209,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="WORD_COUNT" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x00058" name="CMD_MDP_STREAM0_TOTAL"> - <bitfield name="H_TOTAL" low="0" high="11" type="uint"/> - <bitfield name="V_TOTAL" low="16" high="27" type="uint"/> + <bitfield name="H_TOTAL" low="0" high="15" type="uint"/> + <bitfield name="V_TOTAL" low="16" high="31" type="uint"/> </reg32> <reg32 offset="0x0005c" name="CMD_MDP_STREAM1_CTRL"> <bitfield name="DATA_TYPE" low="0" high="5" type="uint"/> diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index 4e5ac0f25dea..f41516dd0567 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -22,7 +22,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00018" name="GLBL_CTRL"/> <reg32 offset="0x0001c" name="RBUF_CTRL"/> <reg32 offset="0x00020" name="VREG_CTRL_0"/> - <reg32 offset="0x00024" name="CTRL_0"/> + <reg32 offset="0x00024" name="CTRL_0"> + <bitfield name="CLKSL_SHUTDOWNB" pos="7" type="boolean"/> + <bitfield name="DIGTOP_PWRDN_B" pos="6" type="boolean"/> + <bitfield name="PLL_SHUTDOWNB" pos="5" type="boolean"/> + <bitfield name="DLN3_SHUTDOWNB" pos="4" type="boolean"/> + <bitfield name="DLN2_SHUTDOWNB" pos="3" type="boolean"/> + <bitfield name="CLK_SHUTDOWNB" pos="2" type="boolean"/> + <bitfield name="DLN1_SHUTDOWNB" pos="1" type="boolean"/> + <bitfield name="DLN0_SHUTDOWNB" pos="0" type="boolean"/> + </reg32> <reg32 offset="0x00028" name="CTRL_1"/> <reg32 offset="0x0002c" name="CTRL_2"/> <reg32 offset="0x00030" name="CTRL_3"/> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index a409404627c7..1d603dadfabd 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,7 +11,6 @@ import collections import argparse import time import datetime -import re class Error(Exception): def __init__(self, message): @@ -31,7 +30,7 @@ class Enum(object): def names(self): return [n for (n, value) in self.values] - def dump(self): + def dump(self, is_deprecated): use_hex = False for (name, value) in self.values: if value > 0x1000: @@ -45,7 +44,7 @@ class Enum(object): print("\t%s = %d," % (name, value)) print("};\n") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass class Field(object): @@ -70,11 +69,11 @@ class Field(object): raise parser.error("booleans should be 1 bit fields") elif self.type == "float" and not (high - low == 31 or high - low == 15): raise parser.error("floats should be 16 or 32 bit fields") - elif not self.type in builtin_types and not self.type in parser.enums: + elif self.type not in builtin_types and self.type not in parser.enums: raise parser.error("unknown type '%s'" % self.type) def ctype(self, var_name): - if self.type == None: + if self.type is None: type = "uint32_t" val = var_name elif self.type == "boolean": @@ -124,7 +123,7 @@ def field_name(reg, f): name = f.name.lower() else: # We hit this path when a reg is defined with no bitset fields, ie. - # <reg32 offset="0x88db" name="RB_BLIT_DST_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> + # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> name = reg.name.lower() if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()): @@ -146,10 +145,23 @@ def indices_strides(indices): "%s(i%d)" % (offset, idx) for (idx, (ctype, stride, offset)) in enumerate(indices)]) +def is_number(str): + try: + int(str) + return True + except ValueError: + return False + +def sanitize_variant(variant): + if variant and "-" in variant: + return variant[:variant.index("-")] + return variant + class Bitset(object): def __init__(self, name, template): self.name = name self.inline = False + self.reg = None if template: self.fields = template.fields[:] else: @@ -175,11 +187,7 @@ class Bitset(object): print("#endif\n") print(" return (struct fd_reg_pair) {") - if reg.array: - print(" .reg = REG_%s(__i)," % reg.full_name) - else: - print(" .reg = REG_%s," % reg.full_name) - + print(" .reg = (uint32_t)%s," % reg.reg_offset()) print(" .value =") for f in self.fields: if f.type in [ "address", "waddress" ]: @@ -204,7 +212,7 @@ class Bitset(object): print(" };") - def dump_pack_struct(self, reg=None): + def dump_pack_struct(self, is_deprecated, reg=None): if not reg: return @@ -229,12 +237,15 @@ class Bitset(object): tab_to(" uint32_t", "dword;") print("};\n") + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" if reg.array: - print("static inline struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % + (depcrstr, prefix, prefix)) else: - print("static inline struct fd_reg_pair\npack_%s(struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(struct %s fields)\n{" % + (depcrstr, prefix, prefix)) self.dump_regpair_builder(reg) @@ -253,18 +264,23 @@ class Bitset(object): (prefix, prefix, prefix, skip)) - def dump(self, prefix=None): - if prefix == None: + def dump(self, is_deprecated, prefix=None): + if prefix is None: prefix = self.name + if self.reg and self.reg.bit_size == 64: + print("static inline uint32_t %s_LO(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") + print("static inline uint32_t %s_HI(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") for f in self.fields: if f.name: name = prefix + "_" + f.name else: name = prefix - if not f.name and f.low == 0 and f.shr == 0 and not f.type in ["float", "fixed", "ufixed"]: + if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]: pass - elif f.type == "boolean" or (f.type == None and f.low == f.high): + elif f.type == "boolean" or (f.type is None and f.low == f.high): tab_to("#define %s" % name, "0x%08x" % (1 << f.low)) else: tab_to("#define %s__MASK" % name, "0x%08x" % mask(f.low, f.high)) @@ -286,6 +302,7 @@ class Array(object): self.domain = domain self.variant = variant self.parent = parent + self.children = [] if self.parent: self.name = self.parent.name + "_" + self.local_name else: @@ -337,12 +354,15 @@ class Array(object): offset += self.parent.total_offset() return offset - def dump(self): + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" proto = indices_varlist(self.indices()) strides = indices_strides(self.indices()) array_offset = self.total_offset() if self.fixed_offsets: - print("static inline uint32_t __offset_%s(%s idx)" % (self.local_name, self.index_ctype())) + print("static inline%s uint32_t __offset_%s(%s idx)" % (depcrstr, self.local_name, self.index_ctype())) print("{\n\tswitch (idx) {") if self.index_type: for val, offset in zip(self.index_type.names(), self.offsets): @@ -357,7 +377,7 @@ class Array(object): else: tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, proto), "(0x%08x + %s )\n" % (array_offset, strides)) - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass def dump_regpair_builder(self): @@ -373,6 +393,7 @@ class Reg(object): self.bit_size = bit_size if array: self.name = array.name + "_" + self.name + array.children.append(self) self.full_name = self.domain + "_" + self.name if "stride" in attrs: self.stride = int(attrs["stride"], 0) @@ -397,25 +418,34 @@ class Reg(object): else: return self.offset - def dump(self): + def reg_offset(self): + if self.array: + offset = self.array.offset + self.offset + return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride) + return "0x%08x" % self.offset + + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED " proto = indices_prototype(self.indices()) strides = indices_strides(self.indices()) offset = self.total_offset() if proto == '': tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) else: - print("static inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (self.full_name, proto, offset, strides)) + print("static inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (depcrstr, self.full_name, proto, offset, strides)) if self.bitset.inline: - self.bitset.dump(self.full_name) + self.bitset.dump(is_deprecated, self.full_name) + print("") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): if self.bitset.inline: - self.bitset.dump_pack_struct(self) + self.bitset.dump_pack_struct(is_deprecated, self) def dump_regpair_builder(self): - if self.bitset.inline: - self.bitset.dump_regpair_builder(self) + self.bitset.dump_regpair_builder(self) def dump_py(self): print("\tREG_%s = 0x%08x" % (self.full_name, self.offset)) @@ -444,9 +474,6 @@ class Parser(object): self.variants = set() self.file = [] self.xml_files = [] - self.copyright_year = None - self.authors = [] - self.license = None def error(self, message): parser, filename = self.stack[-1] @@ -454,7 +481,7 @@ class Parser(object): def prefix(self, variant=None): if self.current_prefix_type == "variant" and variant: - return variant + return sanitize_variant(variant) elif self.current_stripe: return self.current_stripe + "_" + self.current_domain elif self.current_prefix: @@ -500,15 +527,22 @@ class Parser(object): return varset def parse_variants(self, attrs): - if not "variants" in attrs: + if "variants" not in attrs: return None - variant = attrs["variants"].split(",")[0] - if "-" in variant: - variant = variant[:variant.index("-")] + variant = attrs["variants"].split(",")[0] varset = self.parse_varset(attrs) - assert varset.has_name(variant) + if "-" in variant: + # if we have a range, validate that both the start and end + # of the range are valid enums: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + assert varset.has_name(start) + if end != "": + assert varset.has_name(end) + else: + assert varset.has_name(variant) return variant @@ -572,9 +606,6 @@ class Parser(object): error_str = str(xmlschema.error_log.filter_from_errors()[0]) raise self.error("Schema validation failed for: " + filename + "\n" + error_str) except ImportError as e: - if self.validate: - raise e - print("lxml not found, skipping validation", file=sys.stderr) def do_parse(self, filename): @@ -620,6 +651,7 @@ class Parser(object): self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size) self.current_reg.bitset = self.current_bitset + self.current_bitset.reg = self.current_reg if len(self.stack) == 1: self.file.append(self.current_reg) @@ -643,7 +675,7 @@ class Parser(object): elif name == "domain": self.current_domain = attrs["name"] if "prefix" in attrs: - self.current_prefix = self.parse_variants(attrs) + self.current_prefix = sanitize_variant(self.parse_variants(attrs)) self.current_prefix_type = attrs["prefix"] else: self.current_prefix = None @@ -651,7 +683,7 @@ class Parser(object): if "varset" in attrs: self.current_varset = self.enums[attrs["varset"]] elif name == "stripe": - self.current_stripe = self.parse_variants(attrs) + self.current_stripe = sanitize_variant(self.parse_variants(attrs)) elif name == "enum": self.current_enum_value = 0 self.current_enum = Enum(attrs["name"]) @@ -686,10 +718,6 @@ class Parser(object): self.parse_field(attrs["name"], attrs) elif name == "database": self.do_validate(attrs["xsi:schemaLocation"]) - elif name == "copyright": - self.copyright_year = attrs["year"] - elif name == "author": - self.authors.append(attrs["name"] + " <" + attrs["email"] + "> " + attrs["name"]) def end_element(self, name): if name == "domain": @@ -703,11 +731,16 @@ class Parser(object): elif name == "reg32": self.current_reg = None elif name == "array": + # if the array has no Reg children, push an implicit reg32: + if len(self.current_array.children) == 0: + attrs = { + "name": "REG", + "offset": "0", + } + self.parse_reg(attrs, 32) self.current_array = self.current_array.parent elif name == "enum": self.current_enum = None - elif name == "license": - self.license = self.cdata def character_data(self, data): self.cdata += data @@ -720,10 +753,10 @@ class Parser(object): if variants: for variant, vreg in variants.items(): if reg == vreg: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) else: for variant in self.variants: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) print("#ifdef __cplusplus") @@ -753,6 +786,9 @@ class Parser(object): print("#endif") + def has_variants(self, reg): + return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:]) + def dump(self): enums = [] bitsets = [] @@ -766,7 +802,7 @@ class Parser(object): regs.append(e) for e in enums + bitsets + regs: - e.dump() + e.dump(self.has_variants(e)) self.dump_reg_usages() @@ -782,8 +818,7 @@ class Parser(object): def dump_reg_variants(self, regname, variants): - # Don't bother for things that only have a single variant: - if len(variants) == 1: + if is_number(regname) or is_number(regname[1:]): return print("#ifdef __cplusplus") print("struct __%s {" % regname) @@ -834,11 +869,20 @@ class Parser(object): xtravar = "__i, " print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname)) for variant in variants.keys(): - print(" if (%s == %s) {" % (varenum.upper(), variant)) + if "-" in variant: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + if end != "": + print(" if ((%s >= %s) && (%s <= %s)) {" % (varenum.upper(), start, varenum.upper(), end)) + else: + print(" if (%s >= %s) {" % (varenum.upper(), start)) + else: + print(" if (%s == %s) {" % (varenum.upper(), variant)) reg = variants[variant] reg.dump_regpair_builder() print(" } else") print(" assert(!\"invalid variant\");") + print(" return (struct fd_reg_pair){};") print("}") if bit_size == 64: @@ -851,7 +895,7 @@ class Parser(object): def dump_structs(self): for e in self.file: - e.dump_pack_struct() + e.dump_pack_struct(self.has_variants(e)) for regname in self.variant_regs: self.dump_reg_variants(regname, self.variant_regs[regname]) @@ -868,33 +912,7 @@ def dump_c(args, guard, func): print("#ifndef %s\n#define %s\n" % (guard, guard)) - print("""/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng gen_header.py tool in this git repository: -http://gitlab.freedesktop.org/mesa/mesa/ -git clone https://gitlab.freedesktop.org/mesa/mesa.git - -The rules-ng-ng source files this header was generated from are: -""") - maxlen = 0 - for filepath in p.xml_files: - new_filepath = re.sub("^.+drivers","drivers",filepath) - maxlen = max(maxlen, len(new_filepath)) - for filepath in p.xml_files: - pad = " " * (maxlen - len(new_filepath)) - filesize = str(os.path.getsize(filepath)) - filesize = " " * (7 - len(filesize)) + filesize - filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)") - if p.copyright_year: - current_year = str(datetime.date.today().year) - print() - print("Copyright (C) %s-%s by the following authors:" % (p.copyright_year, current_year)) - for author in p.authors: - print("- " + author) - if p.license: - print(p.license) - print("*/") + print("/* Autogenerated file, DO NOT EDIT manually! */") print() print("#ifdef __KERNEL__") @@ -912,9 +930,20 @@ The rules-ng-ng source files this header was generated from are: print("#endif") print() + print("#ifndef FD_NO_DEPRECATED_PACK") + print("#define FD_DEPRECATED __attribute__((deprecated))") + print("#else") + print("#define FD_DEPRECATED") + print("#endif") + print() + func(p) - print("\n#endif /* %s */" % guard) + print() + print("#undef FD_DEPRECATED") + print() + + print("#endif /* %s */" % guard) def dump_c_defines(args): @@ -931,7 +960,7 @@ def dump_py_defines(args): p = Parser() try: - p.parse(args.rnn, args.xml) + p.parse(args.rnn, args.xml, args.validate) except Error as e: print(e, file=sys.stderr) exit(1) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 11d5b923d6e7..e2c55f4b9c5a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -795,6 +795,10 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, struct nouveau_drm *drm = nouveau_drm(plane->dev); uint8_t i; + /* All chipsets can display all formats in linear layout */ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + if (drm->client.device.info.chipset < 0xc0) { const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; diff --git a/drivers/gpu/drm/nouveau/gv100_fence.c b/drivers/gpu/drm/nouveau/gv100_fence.c index cccdeca72002..317e516c4ec7 100644 --- a/drivers/gpu/drm/nouveau/gv100_fence.c +++ b/drivers/gpu/drm/nouveau/gv100_fence.c @@ -18,7 +18,7 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) struct nvif_push *push = &chan->chan.push; int ret; - ret = PUSH_WAIT(push, 8); + ret = PUSH_WAIT(push, 13); if (ret) return ret; @@ -32,6 +32,11 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) NVDEF(NVC36F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) | NVDEF(NVC36F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS)); + PUSH_MTHD(push, NVC36F, MEM_OP_A, 0, + MEM_OP_B, 0, + MEM_OP_C, NVDEF(NVC36F, MEM_OP_C, MEMBAR_TYPE, SYS_MEMBAR), + MEM_OP_D, NVDEF(NVC36F, MEM_OP_D, OPERATION, MEMBAR)); + PUSH_MTHD(push, NVC36F, NON_STALL_INTERRUPT, 0); PUSH_KICK(push); diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h index 8735dda4c8a7..338f74b9f501 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h @@ -7,6 +7,91 @@ #define NVC36F_NON_STALL_INTERRUPT (0x00000020) #define NVC36F_NON_STALL_INTERRUPT_HANDLE 31:0 +// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for +// specifying the page address for a targeted TLB invalidate and the uTLB for +// a targeted REPLAY_CANCEL for UVM. +// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly +// rearranged fields. +#define NVC36F_MEM_OP_A (0x00000028) +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12 +#define NVC36F_MEM_OP_B (0x0000002c) +#define NVC36F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0 +#define NVC36F_MEM_OP_C (0x00000030) +#define NVC36F_MEM_OP_C_MEMBAR_TYPE 2:0 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0 +// MEM_OP_D MUST be preceded by MEM_OPs A-C. +#define NVC36F_MEM_OP_D (0x00000034) +#define NVC36F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_D_OPERATION 31:27 +#define NVC36F_MEM_OP_D_OPERATION_MEMBAR 0x00000005 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a +#define NVC36F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d +#define NVC36F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e +// CLEAN_LINES is an alias for Tegra/GPU IP usage +#define NVC36F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e +#define NVC36F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f +#define NVC36F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010 +#define NVC36F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015 +#define NVC36F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3 #define NVC36F_SEM_ADDR_LO (0x0000005c) #define NVC36F_SEM_ADDR_LO_OFFSET 31:2 #define NVC36F_SEM_ADDR_HI (0x00000060) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index edbbda78bac9..c4949e815eb3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -60,14 +60,14 @@ * virtual address in the GPU's VA space there is no guarantee that the actual * mappings are created in the GPU's MMU. If the given memory is swapped out * at the time the bind operation is executed the kernel will stash the mapping - * details into it's internal alloctor and create the actual MMU mappings once + * details into it's internal allocator and create the actual MMU mappings once * the memory is swapped back in. While this is transparent for userspace, it is * guaranteed that all the backing memory is swapped back in and all the memory * mappings, as requested by userspace previously, are actually mapped once the * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. * * A VM_BIND job can be executed either synchronously or asynchronously. If - * exectued asynchronously, userspace may provide a list of syncobjs this job + * executed asynchronously, userspace may provide a list of syncobjs this job * will wait for and/or a list of syncobj the kernel will signal once the * VM_BIND job finished execution. If executed synchronously the ioctl will * block until the bind job is finished. For synchronous jobs the kernel will @@ -82,7 +82,7 @@ * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have * an up to date view of the VA space. However, the actual mappings might still * be pending. Hence, EXEC jobs require to have the particular fences - of - * the corresponding VM_BIND jobs they depent on - attached to them. + * the corresponding VM_BIND jobs they depend on - attached to them. */ static int diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9f345a008717..869d4335c0f4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -240,21 +240,6 @@ nouveau_fence_emit(struct nouveau_fence *fence) return ret; } -void -nouveau_fence_cancel(struct nouveau_fence *fence) -{ - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - unsigned long flags; - - spin_lock_irqsave(&fctx->lock, flags); - if (!dma_fence_is_signaled_locked(&fence->base)) { - dma_fence_set_error(&fence->base, -ECANCELED); - if (nouveau_fence_signal(fence)) - nvif_event_block(&fctx->event); - } - spin_unlock_irqrestore(&fctx->lock, flags); -} - bool nouveau_fence_done(struct nouveau_fence *fence) { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 9957a919bd38..183dd43ecfff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -29,7 +29,6 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -void nouveau_fence_cancel(struct nouveau_fence *fence); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index a5ce8eb4a3be..8d5853deeee4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -30,10 +30,7 @@ static int nouveau_platform_probe(struct platform_device *pdev) func = of_device_get_match_data(&pdev->dev); drm = nouveau_platform_device_create(func, pdev, &device); - if (IS_ERR(drm)) - return PTR_ERR(drm); - - return 0; + return PTR_ERR_OR_ZERO(drm); } static void nouveau_platform_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index cd95446d6851..caab60fc62f6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -108,9 +108,21 @@ struct dma_buf *nouveau_gem_prime_export(struct drm_gem_object *gobj, int flags) { struct nouveau_bo *nvbo = nouveau_gem_object(gobj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (nvbo->no_share) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&nvbo->bo, &ctx); + if (ret) + return ERR_PTR(ret); + return drm_gem_prime_export(gobj, flags); } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 0cc0bc9f9952..e60f7892f5ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -11,7 +11,6 @@ #include "nouveau_exec.h" #include "nouveau_abi16.h" #include "nouveau_sched.h" -#include "nouveau_chan.h" #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 @@ -122,9 +121,11 @@ nouveau_job_done(struct nouveau_job *job) { struct nouveau_sched *sched = job->sched; - spin_lock(&sched->job_list.lock); + spin_lock(&sched->job.list.lock); list_del(&job->entry); - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); } void @@ -305,9 +306,9 @@ nouveau_job_submit(struct nouveau_job *job) } /* Submit was successful; add the job to the schedulers job list. */ - spin_lock(&sched->job_list.lock); - list_add(&job->entry, &sched->job_list.head); - spin_unlock(&sched->job_list.lock); + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); @@ -392,23 +393,10 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -static void -nouveau_sched_cancel_job(struct drm_sched_job *sched_job) -{ - struct nouveau_fence *fence; - struct nouveau_job *job; - - job = to_nouveau_job(sched_job); - fence = to_nouveau_fence(job->done_fence); - - nouveau_fence_cancel(fence); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, - .cancel_job = nouveau_sched_cancel_job, }; static int @@ -458,8 +446,9 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, goto fail_sched; mutex_init(&sched->mutex); - spin_lock_init(&sched->job_list.lock); - INIT_LIST_HEAD(&sched->job_list.head); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); return 0; @@ -493,12 +482,16 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, return 0; } + static void nouveau_sched_fini(struct nouveau_sched *sched) { struct drm_gpu_scheduler *drm_sched = &sched->base; struct drm_sched_entity *entity = &sched->entity; + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + drm_sched_entity_fini(entity); drm_sched_fini(drm_sched); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index b98c3f0bef30..20cd1da8db73 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -103,9 +103,12 @@ struct nouveau_sched { struct mutex mutex; struct { - struct list_head head; - spinlock_t lock; - } job_list; + struct { + struct list_head head; + spinlock_t lock; + } list; + struct wait_queue_head wq; + } job; }; int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index d94a85509176..79eefdfd08a2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) u64 end = addr + range; again: - spin_lock(&sched->job_list.lock); - list_for_each_entry(__job, &sched->job_list.head, entry) { + spin_lock(&sched->job.list.lock); + list_for_each_entry(__job, &sched->job.list.head, entry) { struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); list_for_each_op(op, &bind_job->ops) { @@ -1030,7 +1030,7 @@ again: if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1038,7 +1038,7 @@ again: } } } - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); } static int diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 99296f03371a..07c1ebc2a941 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -219,7 +219,8 @@ nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, case RAW: args->type = NVIF_VMM_V0_TYPE_RAW; break; default: WARN_ON(1); - return -EINVAL; + ret = -EINVAL; + goto done; } memcpy(args->data, argv, argc); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index fdffa0391b31..6fd4e60634fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -350,6 +350,8 @@ nvkm_fifo_dtor(struct nvkm_engine *engine) nvkm_chid_unref(&fifo->chid); nvkm_event_fini(&fifo->nonstall.event); + if (fifo->func->nonstall_dtor) + fifo->func->nonstall_dtor(fifo); mutex_destroy(&fifo->mutex); if (fifo->func->dtor) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c index e74493a4569e..6848a56f20c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c @@ -517,19 +517,11 @@ ga100_fifo_nonstall_intr(struct nvkm_inth *inth) static void ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_block(&runl->nonstall.inth); } static void ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_allow(&runl->nonstall.inth); } const struct nvkm_event_func @@ -564,12 +556,26 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo) if (ret) return ret; + nvkm_inth_allow(&runl->nonstall.inth); + nr = max(nr, runl->id + 1); } return nr; } +void +ga100_fifo_nonstall_dtor(struct nvkm_fifo *fifo) +{ + struct nvkm_runl *runl; + + nvkm_runl_foreach(runl, fifo) { + if (runl->nonstall.vector < 0) + continue; + nvkm_inth_block(&runl->nonstall.inth); + } +} + int ga100_fifo_runl_ctor(struct nvkm_fifo *fifo) { @@ -599,6 +605,7 @@ ga100_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c index 755235f55b3a..18a0b1f4eab7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -30,6 +30,7 @@ ga102_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h index 5e81ae195329..fff1428ef267 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h @@ -41,6 +41,7 @@ struct nvkm_fifo_func { void (*start)(struct nvkm_fifo *, unsigned long *); int (*nonstall_ctor)(struct nvkm_fifo *); + void (*nonstall_dtor)(struct nvkm_fifo *); const struct nvkm_event_func *nonstall; const struct nvkm_runl_func *runl; @@ -200,6 +201,7 @@ u32 tu102_chan_doorbell_handle(struct nvkm_chan *); int ga100_fifo_runl_ctor(struct nvkm_fifo *); int ga100_fifo_nonstall_ctor(struct nvkm_fifo *); +void ga100_fifo_nonstall_dtor(struct nvkm_fifo *); extern const struct nvkm_event_func ga100_fifo_nonstall; extern const struct nvkm_runl_func ga100_runl; extern const struct nvkm_runq_func ga100_runq; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index b7da3ab44c27..7c43397c19e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -103,7 +103,7 @@ gm200_flcn_pio_imem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 i static void gm200_flcn_pio_imem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag) { - nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag++); + nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag); while (len >= 4) { nvkm_falcon_wr32(falcon, 0x184 + (port * 0x10), *(u32 *)img); img += 4; @@ -249,9 +249,11 @@ int gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) { struct nvkm_falcon *falcon = fw->falcon; - int target, ret; + int ret; if (fw->inst) { + int target; + nvkm_falcon_mask(falcon, 0x048, 0x00000001, 0x00000001); switch (nvkm_memory_target(fw->inst)) { @@ -285,15 +287,6 @@ gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) } if (fw->boot) { - switch (nvkm_memory_target(&fw->fw.mem.memory)) { - case NVKM_MEM_TARGET_VRAM: target = 4; break; - case NVKM_MEM_TARGET_HOST: target = 5; break; - case NVKM_MEM_TARGET_NCOH: target = 6; break; - default: - WARN_ON(1); - return -EINVAL; - } - ret = nvkm_falcon_pio_wr(falcon, fw->boot, 0, 0, IMEM, falcon->code.limit - fw->boot_size, fw->boot_size, fw->boot_addr >> 8, false); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 52412965fac1..5b721bd9d799 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -209,11 +209,12 @@ nvkm_gsp_fwsec_v2(struct nvkm_gsp *gsp, const char *name, fw->boot_addr = bld->start_tag << 8; fw->boot_size = bld->code_size; fw->boot = kmemdup(bl->data + hdr->data_offset + bld->code_off, fw->boot_size, GFP_KERNEL); - if (!fw->boot) - ret = -ENOMEM; nvkm_firmware_put(bl); + if (!fw->boot) + return -ENOMEM; + /* Patch in interface data. */ return nvkm_gsp_fwsec_patch(gsp, fw, desc->InterfaceOffset, init_cmd); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c index 1ac5628c5140..4ed54b386a60 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c @@ -601,6 +601,7 @@ r535_fifo_new(const struct nvkm_fifo_func *hw, struct nvkm_device *device, rm->chan.func = &r535_chan; rm->nonstall = &ga100_fifo_nonstall; rm->nonstall_ctor = ga100_fifo_nonstall_ctor; + rm->nonstall_dtor = ga100_fifo_nonstall_dtor; return nvkm_fifo_new_(rm, device, type, inst, pfifo); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 9d06ff722fea..0dc4782df8c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -325,7 +325,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_peek(gsp, sizeof(*rpc), info.retries); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } @@ -334,7 +334,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_recv_one_elem(gsp, &info); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } diff --git a/drivers/gpu/drm/nova/file.rs b/drivers/gpu/drm/nova/file.rs index 7e7d4e2de2fb..90b9d2d0ec4a 100644 --- a/drivers/gpu/drm/nova/file.rs +++ b/drivers/gpu/drm/nova/file.rs @@ -36,7 +36,7 @@ impl File { _ => return Err(EINVAL), }; - getparam.value = value; + getparam.value = Into::<u64>::into(value); Ok(0) } diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 054b71dba6a7..794267f0f007 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -378,10 +378,8 @@ static int omap_display_id(struct omap_dss_device *output) struct device_node *node = NULL; if (output->bridge) { - struct drm_bridge *bridge = output->bridge; - - while (drm_bridge_get_next_bridge(bridge)) - bridge = drm_bridge_get_next_bridge(bridge); + struct drm_bridge *bridge __free(drm_bridge_put) = + drm_bridge_chain_get_last_bridge(output->bridge->encoder); node = bridge->of_node; } diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index feba520a0f45..62435e3cd9f4 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1843,6 +1843,13 @@ static const struct panel_delay delay_200_500_e50_d100 = { .disable = 100, }; +static const struct panel_delay delay_80_500_e50_d50 = { + .hpd_absent = 80, + .unprepare = 500, + .enable = 50, + .disable = 50, +}; + #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \ { \ .ident = { \ @@ -1955,6 +1962,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a36, &delay_200_500_e200, "Unknown"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a3e, &delay_200_500_e80_d50, "NV116WHM-N49"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a6a, &delay_200_500_e80, "NV140WUM-N44"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ac5, &delay_200_500_e50, "NV116WHM-N4C"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ae8, &delay_200_500_e50_p2e80, "NV140WUM-N41"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b09, &delay_200_500_e50_po2e200, "NV140FHM-NZ"), @@ -1996,6 +2004,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('C', 'M', 'N', 0x124c, &delay_200_500_e80_d50, "N122JCA-ENK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x142e, &delay_200_500_e80_d50, "N140BGA-EA4"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1441, &delay_200_500_e80_d50, "N140JCA-ELK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x144f, &delay_200_500_e80_d50, "N140HGA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1468, &delay_200_500_e80, "N140HGA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x14a8, &delay_200_500_e80, "N140JCA-ELP"), @@ -2011,10 +2020,12 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('C', 'S', 'W', 0x1100, &delay_200_500_e80_d50, "MNB601LS1-1"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1103, &delay_200_500_e80_d50, "MNB601LS1-3"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1104, &delay_200_500_e50_d100, "MNB601LS1-4"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x143f, &delay_200_500_e50, "MNE007QS3-6"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1448, &delay_200_500_e50, "MNE007QS3-7"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1457, &delay_80_500_e80_p2e200, "MNE007QS3-8"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1462, &delay_200_500_e50, "MNE007QS5-2"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1468, &delay_200_500_e50, "MNE007QB2-2"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x146e, &delay_80_500_e50_d50, "MNE007QB3-1"), EDP_PANEL_ENTRY('E', 'T', 'C', 0x0000, &delay_50_500_e200_d200_po2e335, "LP079QX1-SP0V"), diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index ac433345a179..ad4993b2f92a 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -1417,6 +1417,200 @@ static const struct ili9881c_instr rpi_7inch_init[] = { ILI9881C_COMMAND_INSTR(0xD3, 0x39), }; +static const struct ili9881c_instr bsd1218_a101kl68_init[] = { + ILI9881C_SWITCH_PAGE_INSTR(3), + ILI9881C_COMMAND_INSTR(0x01, 0x00), + ILI9881C_COMMAND_INSTR(0x02, 0x00), + ILI9881C_COMMAND_INSTR(0x03, 0x55), + ILI9881C_COMMAND_INSTR(0x04, 0x55), + ILI9881C_COMMAND_INSTR(0x05, 0x03), + ILI9881C_COMMAND_INSTR(0x06, 0x06), + ILI9881C_COMMAND_INSTR(0x07, 0x00), + ILI9881C_COMMAND_INSTR(0x08, 0x07), + ILI9881C_COMMAND_INSTR(0x09, 0x00), + ILI9881C_COMMAND_INSTR(0x0a, 0x00), + ILI9881C_COMMAND_INSTR(0x0b, 0x00), + ILI9881C_COMMAND_INSTR(0x0c, 0x00), + ILI9881C_COMMAND_INSTR(0x0d, 0x00), + ILI9881C_COMMAND_INSTR(0x0e, 0x00), + ILI9881C_COMMAND_INSTR(0x0f, 0x00), + ILI9881C_COMMAND_INSTR(0x10, 0x00), + ILI9881C_COMMAND_INSTR(0x11, 0x00), + ILI9881C_COMMAND_INSTR(0x12, 0x00), + ILI9881C_COMMAND_INSTR(0x13, 0x00), + ILI9881C_COMMAND_INSTR(0x14, 0x00), + ILI9881C_COMMAND_INSTR(0x15, 0x00), + ILI9881C_COMMAND_INSTR(0x16, 0x00), + ILI9881C_COMMAND_INSTR(0x17, 0x00), + ILI9881C_COMMAND_INSTR(0x18, 0x00), + ILI9881C_COMMAND_INSTR(0x19, 0x00), + ILI9881C_COMMAND_INSTR(0x1a, 0x00), + ILI9881C_COMMAND_INSTR(0x1b, 0x00), + ILI9881C_COMMAND_INSTR(0x1c, 0x00), + ILI9881C_COMMAND_INSTR(0x1d, 0x00), + ILI9881C_COMMAND_INSTR(0x1e, 0xc0), + ILI9881C_COMMAND_INSTR(0x1f, 0x80), + ILI9881C_COMMAND_INSTR(0x20, 0x04), + ILI9881C_COMMAND_INSTR(0x21, 0x03), + ILI9881C_COMMAND_INSTR(0x22, 0x00), + ILI9881C_COMMAND_INSTR(0x23, 0x00), + ILI9881C_COMMAND_INSTR(0x24, 0x00), + ILI9881C_COMMAND_INSTR(0x25, 0x00), + ILI9881C_COMMAND_INSTR(0x26, 0x00), + ILI9881C_COMMAND_INSTR(0x27, 0x00), + ILI9881C_COMMAND_INSTR(0x28, 0x33), + ILI9881C_COMMAND_INSTR(0x29, 0x33), + ILI9881C_COMMAND_INSTR(0x2a, 0x00), + ILI9881C_COMMAND_INSTR(0x2b, 0x00), + ILI9881C_COMMAND_INSTR(0x2c, 0x00), + ILI9881C_COMMAND_INSTR(0x2d, 0x00), + ILI9881C_COMMAND_INSTR(0x2e, 0x00), + ILI9881C_COMMAND_INSTR(0x2f, 0x00), + ILI9881C_COMMAND_INSTR(0x30, 0x00), + ILI9881C_COMMAND_INSTR(0x31, 0x00), + ILI9881C_COMMAND_INSTR(0x32, 0x00), + ILI9881C_COMMAND_INSTR(0x33, 0x00), + ILI9881C_COMMAND_INSTR(0x34, 0x04), + ILI9881C_COMMAND_INSTR(0x35, 0x00), + ILI9881C_COMMAND_INSTR(0x36, 0x00), + ILI9881C_COMMAND_INSTR(0x37, 0x00), + ILI9881C_COMMAND_INSTR(0x38, 0x3c), + ILI9881C_COMMAND_INSTR(0x39, 0x00), + ILI9881C_COMMAND_INSTR(0x3a, 0x00), + ILI9881C_COMMAND_INSTR(0x3b, 0x00), + ILI9881C_COMMAND_INSTR(0x3c, 0x00), + ILI9881C_COMMAND_INSTR(0x3d, 0x00), + ILI9881C_COMMAND_INSTR(0x3e, 0x00), + ILI9881C_COMMAND_INSTR(0x3f, 0x00), + ILI9881C_COMMAND_INSTR(0x40, 0x00), + ILI9881C_COMMAND_INSTR(0x41, 0x00), + ILI9881C_COMMAND_INSTR(0x42, 0x00), + ILI9881C_COMMAND_INSTR(0x43, 0x00), + ILI9881C_COMMAND_INSTR(0x44, 0x00), + ILI9881C_COMMAND_INSTR(0x50, 0x00), + ILI9881C_COMMAND_INSTR(0x51, 0x11), + ILI9881C_COMMAND_INSTR(0x52, 0x44), + ILI9881C_COMMAND_INSTR(0x53, 0x55), + ILI9881C_COMMAND_INSTR(0x54, 0x88), + ILI9881C_COMMAND_INSTR(0x55, 0xab), + ILI9881C_COMMAND_INSTR(0x56, 0x00), + ILI9881C_COMMAND_INSTR(0x57, 0x11), + ILI9881C_COMMAND_INSTR(0x58, 0x22), + ILI9881C_COMMAND_INSTR(0x59, 0x33), + ILI9881C_COMMAND_INSTR(0x5a, 0x44), + ILI9881C_COMMAND_INSTR(0x5b, 0x55), + ILI9881C_COMMAND_INSTR(0x5c, 0x66), + ILI9881C_COMMAND_INSTR(0x5d, 0x77), + ILI9881C_COMMAND_INSTR(0x5e, 0x00), + ILI9881C_COMMAND_INSTR(0x5f, 0x02), + ILI9881C_COMMAND_INSTR(0x60, 0x02), + ILI9881C_COMMAND_INSTR(0x61, 0x0a), + ILI9881C_COMMAND_INSTR(0x62, 0x09), + ILI9881C_COMMAND_INSTR(0x63, 0x08), + ILI9881C_COMMAND_INSTR(0x64, 0x13), + ILI9881C_COMMAND_INSTR(0x65, 0x12), + ILI9881C_COMMAND_INSTR(0x66, 0x11), + ILI9881C_COMMAND_INSTR(0x67, 0x10), + ILI9881C_COMMAND_INSTR(0x68, 0x0f), + ILI9881C_COMMAND_INSTR(0x69, 0x0e), + ILI9881C_COMMAND_INSTR(0x6a, 0x0d), + ILI9881C_COMMAND_INSTR(0x6b, 0x0c), + ILI9881C_COMMAND_INSTR(0x6c, 0x06), + ILI9881C_COMMAND_INSTR(0x6d, 0x07), + ILI9881C_COMMAND_INSTR(0x6e, 0x02), + ILI9881C_COMMAND_INSTR(0x6f, 0x02), + ILI9881C_COMMAND_INSTR(0x70, 0x02), + ILI9881C_COMMAND_INSTR(0x71, 0x02), + ILI9881C_COMMAND_INSTR(0x72, 0x02), + ILI9881C_COMMAND_INSTR(0x73, 0x02), + ILI9881C_COMMAND_INSTR(0x74, 0x02), + ILI9881C_COMMAND_INSTR(0x75, 0x02), + ILI9881C_COMMAND_INSTR(0x76, 0x02), + ILI9881C_COMMAND_INSTR(0x77, 0x0a), + ILI9881C_COMMAND_INSTR(0x78, 0x06), + ILI9881C_COMMAND_INSTR(0x79, 0x07), + ILI9881C_COMMAND_INSTR(0x7a, 0x10), + ILI9881C_COMMAND_INSTR(0x7b, 0x11), + ILI9881C_COMMAND_INSTR(0x7c, 0x12), + ILI9881C_COMMAND_INSTR(0x7d, 0x13), + ILI9881C_COMMAND_INSTR(0x7e, 0x0c), + ILI9881C_COMMAND_INSTR(0x7f, 0x0d), + ILI9881C_COMMAND_INSTR(0x80, 0x0e), + ILI9881C_COMMAND_INSTR(0x81, 0x0f), + ILI9881C_COMMAND_INSTR(0x82, 0x09), + ILI9881C_COMMAND_INSTR(0x83, 0x08), + ILI9881C_COMMAND_INSTR(0x84, 0x02), + ILI9881C_COMMAND_INSTR(0x85, 0x02), + ILI9881C_COMMAND_INSTR(0x86, 0x02), + ILI9881C_COMMAND_INSTR(0x87, 0x02), + ILI9881C_COMMAND_INSTR(0x88, 0x02), + ILI9881C_COMMAND_INSTR(0x89, 0x02), + ILI9881C_COMMAND_INSTR(0x8a, 0x02), + + ILI9881C_SWITCH_PAGE_INSTR(4), + ILI9881C_COMMAND_INSTR(0x6e, 0x2a), + ILI9881C_COMMAND_INSTR(0x6f, 0x37), + ILI9881C_COMMAND_INSTR(0x3a, 0x24), + ILI9881C_COMMAND_INSTR(0x8d, 0x19), + ILI9881C_COMMAND_INSTR(0x87, 0xba), + ILI9881C_COMMAND_INSTR(0xb2, 0xd1), + ILI9881C_COMMAND_INSTR(0x88, 0x0b), + ILI9881C_COMMAND_INSTR(0x38, 0x01), + ILI9881C_COMMAND_INSTR(0x39, 0x00), + ILI9881C_COMMAND_INSTR(0xb5, 0x02), + ILI9881C_COMMAND_INSTR(0x31, 0x25), + ILI9881C_COMMAND_INSTR(0x3b, 0x98), + + ILI9881C_SWITCH_PAGE_INSTR(1), + ILI9881C_COMMAND_INSTR(0x22, 0x0a), + ILI9881C_COMMAND_INSTR(0x31, 0x0c), + ILI9881C_COMMAND_INSTR(0x53, 0x40), + ILI9881C_COMMAND_INSTR(0x55, 0x45), + ILI9881C_COMMAND_INSTR(0x50, 0xb7), + ILI9881C_COMMAND_INSTR(0x51, 0xb2), + ILI9881C_COMMAND_INSTR(0x60, 0x07), + ILI9881C_COMMAND_INSTR(0xa0, 0x22), + ILI9881C_COMMAND_INSTR(0xa1, 0x3f), + ILI9881C_COMMAND_INSTR(0xa2, 0x4e), + ILI9881C_COMMAND_INSTR(0xa3, 0x17), + ILI9881C_COMMAND_INSTR(0xa4, 0x1a), + ILI9881C_COMMAND_INSTR(0xa5, 0x2d), + ILI9881C_COMMAND_INSTR(0xa6, 0x21), + ILI9881C_COMMAND_INSTR(0xa7, 0x22), + ILI9881C_COMMAND_INSTR(0xa8, 0xc4), + ILI9881C_COMMAND_INSTR(0xa9, 0x1b), + ILI9881C_COMMAND_INSTR(0xaa, 0x25), + ILI9881C_COMMAND_INSTR(0xab, 0xa7), + ILI9881C_COMMAND_INSTR(0xac, 0x1a), + ILI9881C_COMMAND_INSTR(0xad, 0x19), + ILI9881C_COMMAND_INSTR(0xae, 0x4b), + ILI9881C_COMMAND_INSTR(0xaf, 0x1f), + ILI9881C_COMMAND_INSTR(0xb0, 0x2a), + ILI9881C_COMMAND_INSTR(0xb1, 0x59), + ILI9881C_COMMAND_INSTR(0xb2, 0x64), + ILI9881C_COMMAND_INSTR(0xb3, 0x3f), + ILI9881C_COMMAND_INSTR(0xc0, 0x22), + ILI9881C_COMMAND_INSTR(0xc1, 0x48), + ILI9881C_COMMAND_INSTR(0xc2, 0x59), + ILI9881C_COMMAND_INSTR(0xc3, 0x15), + ILI9881C_COMMAND_INSTR(0xc4, 0x15), + ILI9881C_COMMAND_INSTR(0xc5, 0x28), + ILI9881C_COMMAND_INSTR(0xc6, 0x1c), + ILI9881C_COMMAND_INSTR(0xc7, 0x1e), + ILI9881C_COMMAND_INSTR(0xc8, 0xc4), + ILI9881C_COMMAND_INSTR(0xc9, 0x1c), + ILI9881C_COMMAND_INSTR(0xca, 0x2b), + ILI9881C_COMMAND_INSTR(0xcb, 0xa3), + ILI9881C_COMMAND_INSTR(0xcc, 0x1f), + ILI9881C_COMMAND_INSTR(0xcd, 0x1e), + ILI9881C_COMMAND_INSTR(0xce, 0x52), + ILI9881C_COMMAND_INSTR(0xcf, 0x24), + ILI9881C_COMMAND_INSTR(0xd0, 0x2a), + ILI9881C_COMMAND_INSTR(0xd1, 0x58), + ILI9881C_COMMAND_INSTR(0xd2, 0x68), + ILI9881C_COMMAND_INSTR(0xd3, 0x3f), +}; + static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel) { return container_of(panel, struct ili9881c, panel); @@ -1433,33 +1627,24 @@ static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel) * So before any attempt at sending a command or data, we have to be * sure if we're in the right page or not. */ -static int ili9881c_switch_page(struct ili9881c *ctx, u8 page) +static void ili9881c_switch_page(struct mipi_dsi_multi_context *mctx, u8 page) { u8 buf[4] = { 0xff, 0x98, 0x81, page }; - int ret; - - ret = mipi_dsi_dcs_write_buffer(ctx->dsi, buf, sizeof(buf)); - if (ret < 0) - return ret; - return 0; + mipi_dsi_dcs_write_buffer_multi(mctx, buf, sizeof(buf)); } -static int ili9881c_send_cmd_data(struct ili9881c *ctx, u8 cmd, u8 data) +static void ili9881c_send_cmd_data(struct mipi_dsi_multi_context *mctx, u8 cmd, u8 data) { u8 buf[2] = { cmd, data }; - int ret; - - ret = mipi_dsi_dcs_write_buffer(ctx->dsi, buf, sizeof(buf)); - if (ret < 0) - return ret; - return 0; + mipi_dsi_dcs_write_buffer_multi(mctx, buf, sizeof(buf)); } static int ili9881c_prepare(struct drm_panel *panel) { struct ili9881c *ctx = panel_to_ili9881c(panel); + struct mipi_dsi_multi_context mctx = { .dsi = ctx->dsi }; unsigned int i; int ret; @@ -1480,61 +1665,39 @@ static int ili9881c_prepare(struct drm_panel *panel) const struct ili9881c_instr *instr = &ctx->desc->init[i]; if (instr->op == ILI9881C_SWITCH_PAGE) - ret = ili9881c_switch_page(ctx, instr->arg.page); + ili9881c_switch_page(&mctx, instr->arg.page); else if (instr->op == ILI9881C_COMMAND) - ret = ili9881c_send_cmd_data(ctx, instr->arg.cmd.cmd, - instr->arg.cmd.data); - - if (ret) - return ret; + ili9881c_send_cmd_data(&mctx, instr->arg.cmd.cmd, + instr->arg.cmd.data); } - ret = ili9881c_switch_page(ctx, 0); - if (ret) - return ret; - - if (ctx->address_mode) { - ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_SET_ADDRESS_MODE, - &ctx->address_mode, - sizeof(ctx->address_mode)); - if (ret < 0) - return ret; - } - - ret = mipi_dsi_dcs_set_tear_on(ctx->dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK); - if (ret) - return ret; - - ret = mipi_dsi_dcs_exit_sleep_mode(ctx->dsi); - if (ret) - return ret; + ili9881c_switch_page(&mctx, 0); - return 0; -} + if (ctx->address_mode) + ili9881c_send_cmd_data(&mctx, MIPI_DCS_SET_ADDRESS_MODE, + ctx->address_mode); -static int ili9881c_enable(struct drm_panel *panel) -{ - struct ili9881c *ctx = panel_to_ili9881c(panel); - - msleep(120); - - mipi_dsi_dcs_set_display_on(ctx->dsi); + mipi_dsi_dcs_set_tear_on_multi(&mctx, MIPI_DSI_DCS_TEAR_MODE_VBLANK); + mipi_dsi_dcs_exit_sleep_mode_multi(&mctx); + mipi_dsi_msleep(&mctx, 120); + mipi_dsi_dcs_set_display_on_multi(&mctx); + if (mctx.accum_err) + goto disable_power; return 0; -} -static int ili9881c_disable(struct drm_panel *panel) -{ - struct ili9881c *ctx = panel_to_ili9881c(panel); - - return mipi_dsi_dcs_set_display_off(ctx->dsi); +disable_power: + regulator_disable(ctx->power); + return mctx.accum_err; } static int ili9881c_unprepare(struct drm_panel *panel) { struct ili9881c *ctx = panel_to_ili9881c(panel); + struct mipi_dsi_multi_context mctx = { .dsi = ctx->dsi }; - mipi_dsi_dcs_enter_sleep_mode(ctx->dsi); + mipi_dsi_dcs_set_display_off_multi(&mctx); + mipi_dsi_dcs_enter_sleep_mode_multi(&mctx); regulator_disable(ctx->power); gpiod_set_value_cansleep(ctx->reset, 1); @@ -1660,6 +1823,23 @@ static const struct drm_display_mode rpi_7inch_default_mode = { .height_mm = 151, }; +static const struct drm_display_mode bsd1218_a101kl68_default_mode = { + .clock = 70000, + + .hdisplay = 800, + .hsync_start = 800 + 40, + .hsync_end = 800 + 40 + 20, + .htotal = 800 + 40 + 20 + 20, + + .vdisplay = 1280, + .vsync_start = 1280 + 20, + .vsync_end = 1280 + 20 + 4, + .vtotal = 1280 + 20 + 4 + 20, + + .width_mm = 120, + .height_mm = 170, +}; + static int ili9881c_get_modes(struct drm_panel *panel, struct drm_connector *connector) { @@ -1706,8 +1886,6 @@ static enum drm_panel_orientation ili9881c_get_orientation(struct drm_panel *pan static const struct drm_panel_funcs ili9881c_funcs = { .prepare = ili9881c_prepare, .unprepare = ili9881c_unprepare, - .enable = ili9881c_enable, - .disable = ili9881c_disable, .get_modes = ili9881c_get_modes, .get_orientation = ili9881c_get_orientation, }; @@ -1830,8 +2008,18 @@ static const struct ili9881c_desc rpi_7inch_desc = { .lanes = 2, }; +static const struct ili9881c_desc bsd1218_a101kl68_desc = { + .init = bsd1218_a101kl68_init, + .init_length = ARRAY_SIZE(bsd1218_a101kl68_init), + .mode = &bsd1218_a101kl68_default_mode, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, + .lanes = 4, +}; + static const struct of_device_id ili9881c_of_match[] = { { .compatible = "bananapi,lhr050h41", .data = &lhr050h41_desc }, + { .compatible = "bestar,bsd1218-a101kl68", .data = &bsd1218_a101kl68_desc }, { .compatible = "feixin,k101-im2byl02", .data = &k101_im2byl02_desc }, { .compatible = "startek,kd050hdfia020", .data = &kd050hdfia020_desc }, { .compatible = "tdo,tl050hdv35", .data = &tl050hdv35_desc }, diff --git a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c index 83656bb4b0b2..23462065d726 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c +++ b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c @@ -233,8 +233,10 @@ static int jdi_panel_prepare(struct drm_panel *panel) mipi_dsi_dual(mipi_dsi_dcs_set_display_on_multi, &dsi_ctx, jdi->link1, jdi->link2); - if (dsi_ctx.accum_err < 0) + if (dsi_ctx.accum_err < 0) { + err = dsi_ctx.accum_err; goto poweroff; + } jdi->link1->mode_flags &= ~MIPI_DSI_MODE_LPM; jdi->link2->mode_flags &= ~MIPI_DSI_MODE_LPM; diff --git a/drivers/gpu/drm/panel/panel-lvds.c b/drivers/gpu/drm/panel/panel-lvds.c index 23fd535d8f47..46b07f38559f 100644 --- a/drivers/gpu/drm/panel/panel-lvds.c +++ b/drivers/gpu/drm/panel/panel-lvds.c @@ -28,8 +28,6 @@ struct panel_lvds { struct device *dev; const char *label; - unsigned int width; - unsigned int height; struct drm_display_mode dmode; u32 bus_flags; unsigned int bus_format; diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 9256806eb662..4c202fc5ce05 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1094,7 +1094,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, struct drm_panthor_queue_create *queue_args; int ret; - if (!args->queues.count) + if (!args->queues.count || args->queues.count > MAX_CS_PER_CSG) return -EINVAL; ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index a123bc740ba1..156c7a0b62a2 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -74,7 +74,6 @@ static void panthor_gem_free_object(struct drm_gem_object *obj) mutex_destroy(&bo->label.lock); drm_gem_free_mmap_offset(&bo->base.base); - mutex_destroy(&bo->gpuva_list_lock); drm_gem_shmem_free(&bo->base); drm_gem_object_put(vm_root_gem); } @@ -246,8 +245,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t obj->base.base.funcs = &panthor_gem_funcs; obj->base.map_wc = !ptdev->coherent; - mutex_init(&obj->gpuva_list_lock); - drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); mutex_init(&obj->label.lock); panthor_gem_debugfs_bo_init(obj); diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 8fc7215e9b90..80c6e24112d0 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -79,18 +79,6 @@ struct panthor_gem_object { */ struct drm_gem_object *exclusive_vm_root_gem; - /** - * @gpuva_list_lock: Custom GPUVA lock. - * - * Used to protect insertion of drm_gpuva elements to the - * drm_gem_object.gpuva.list list. - * - * We can't use the GEM resv for that, because drm_gpuva_link() is - * called in a dma-signaling path, where we're not allowed to take - * resv locks. - */ - struct mutex gpuva_list_lock; - /** @flags: Combination of drm_panthor_bo_flags flags. */ u32 flags; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 2003b91a8409..6dec4354e378 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -569,15 +569,37 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr, write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); } -static int mmu_hw_do_flush_on_gpu_ctrl(struct panthor_device *ptdev, int as_nr, - u32 op) +static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, + u64 iova, u64 size, u32 op) { const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV; - u32 lsc_flush_op = 0; + u32 lsc_flush_op; int ret; - if (op == AS_COMMAND_FLUSH_MEM) + lockdep_assert_held(&ptdev->mmu->as.slots_lock); + + switch (op) { + case AS_COMMAND_FLUSH_MEM: lsc_flush_op = CACHE_CLEAN | CACHE_INV; + break; + case AS_COMMAND_FLUSH_PT: + lsc_flush_op = 0; + break; + default: + drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op); + return -EINVAL; + } + + if (as_nr < 0) + return 0; + + /* + * If the AS number is greater than zero, then we can be sure + * the device is up and running, so we don't need to explicitly + * power it up + */ + + lock_region(ptdev, as_nr, iova, size); ret = wait_ready(ptdev, as_nr); if (ret) @@ -598,33 +620,6 @@ static int mmu_hw_do_flush_on_gpu_ctrl(struct panthor_device *ptdev, int as_nr, return wait_ready(ptdev, as_nr); } -static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, - u64 iova, u64 size, u32 op) -{ - lockdep_assert_held(&ptdev->mmu->as.slots_lock); - - if (as_nr < 0) - return 0; - - /* - * If the AS number is greater than zero, then we can be sure - * the device is up and running, so we don't need to explicitly - * power it up - */ - - if (op != AS_COMMAND_UNLOCK) - lock_region(ptdev, as_nr, iova, size); - - if (op == AS_COMMAND_FLUSH_MEM || op == AS_COMMAND_FLUSH_PT) - return mmu_hw_do_flush_on_gpu_ctrl(ptdev, as_nr, op); - - /* Run the MMU operation */ - write_cmd(ptdev, as_nr, op); - - /* Wait for the flush to complete */ - return wait_ready(ptdev, as_nr); -} - static int mmu_hw_do_operation(struct panthor_vm *vm, u64 iova, u64 size, u32 op) { @@ -1107,9 +1102,9 @@ static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) * GEM vm_bo list. */ dma_resv_lock(drm_gpuvm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); unpin = drm_gpuvm_bo_put(vm_bo); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(drm_gpuvm_resv(vm)); /* If the vm_bo object was destroyed, release the pin reference that @@ -1227,7 +1222,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) return -EINVAL; - /* Make sure the VA and size are aligned and in-bounds. */ + /* Make sure the VA and size are in-bounds. */ if (size > bo->base.base.size || offset > bo->base.base.size - size) return -EINVAL; @@ -1282,9 +1277,9 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, * calling this function. */ dma_resv_lock(panthor_vm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(panthor_vm_resv(vm)); /* If the a vm_bo for this <VM,BO> combination exists, it already @@ -2036,10 +2031,10 @@ static void panthor_vma_link(struct panthor_vm *vm, { struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_link(&vma->base, vm_bo); drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); } static void panthor_vma_unlink(struct panthor_vm *vm, @@ -2048,9 +2043,9 @@ static void panthor_vma_unlink(struct panthor_vm *vm, struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_unlink(&vma->base); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); /* drm_gpuva_unlink() release the vm_bo, but we manually retained it * when entering this function, so we can implement deferred VMA @@ -2420,8 +2415,9 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, * to be handled the same way user VMAs are. */ drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", - DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem, - min_va, va_range, 0, 0, &panthor_gpuvm_ops); + DRM_GPUVM_RESV_PROTECTED | DRM_GPUVM_IMMEDIATE_MODE, + &ptdev->base, dummy_gem, min_va, va_range, 0, 0, + &panthor_gpuvm_ops); drm_gem_object_put(dummy_gem); return vm; @@ -2451,7 +2447,7 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file, int ret; /* Aligned on page size. */ - if (!IS_ALIGNED(op->va | op->size, vm_pgsz)) + if (!IS_ALIGNED(op->va | op->size | op->bo_offset, vm_pgsz)) return -EINVAL; switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index d1c5e471bdca..3d9f47bc807a 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1832,7 +1832,7 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) return; } - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 7c3a960f486a..ba8db1d07c07 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2457,7 +2457,7 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, u32 tmp, tmp2; tmp = RREG32(MC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; + patch = (tmp & 0x0000f00) == 0x300; if (patch && ((rdev->pdev->device == 0x67B0) || @@ -3238,7 +3238,8 @@ static int ci_populate_all_graphic_levels(struct radeon_device *rdev) u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) * SMU7_MAX_LEVELS_GRAPHICS; SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel; - u32 i, ret; + int ret; + u32 i; memset(levels, 0, level_array_size); @@ -3285,7 +3286,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) * SMU7_MAX_LEVELS_MEMORY; SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel; - u32 i, ret; + int ret; + u32 i; memset(levels, 0, level_array_size); @@ -3436,7 +3438,7 @@ static int ci_setup_default_dpm_tables(struct radeon_device *rdev) pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value = allowed_sclk_vddc_table->entries[i].clk; pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = - (i == 0) ? true : false; + i == 0; pi->dpm_table.sclk_table.count++; } } @@ -3449,7 +3451,7 @@ static int ci_setup_default_dpm_tables(struct radeon_device *rdev) pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value = allowed_mclk_table->entries[i].clk; pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = - (i == 0) ? true : false; + i == 0; pi->dpm_table.mclk_table.count++; } } @@ -4487,7 +4489,7 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, bool patch; tmp = RREG32(MC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; + patch = (tmp & 0x0000f00) == 0x300; if (patch && ((rdev->pdev->device == 0x67B0) || diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 266c57733136..1162cb5d75ed 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -951,13 +951,13 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) u64 offset = (u64)track->vgt_strmout_bo_offset[i] + (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - i, offset, - radeon_bo_size(track->vgt_strmout_bo[i])); + dev_warn_once(p->dev, "streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } } else { - dev_warn(p->dev, "No buffer for streamout %d\n", i); + dev_warn_once(p->dev, "No buffer for streamout %d\n", i); return -EINVAL; } } @@ -979,8 +979,8 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) (tmp >> (i * 4)) & 0xF) { /* at least one component is enabled */ if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + dev_warn_once(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); return -EINVAL; } /* check cb */ @@ -1056,8 +1056,8 @@ static int evergreen_packet0_check(struct radeon_cs_parser *p, case EVERGREEN_VLINE_START_END: r = evergreen_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); return r; } break; @@ -1143,8 +1143,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_VSTMP_RING_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1155,15 +1155,15 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_DB_EQAA: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; case CAYMAN_DB_DEPTH_INFO: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; @@ -1172,8 +1172,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] &= ~Z_ARRAY_MODE(0xf); @@ -1214,8 +1214,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_Z_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_read_offset = radeon_get_ib_value(p, idx); @@ -1226,8 +1226,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_Z_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_write_offset = radeon_get_ib_value(p, idx); @@ -1238,8 +1238,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_STENCIL_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_read_offset = radeon_get_ib_value(p, idx); @@ -1250,8 +1250,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_STENCIL_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_write_offset = radeon_get_ib_value(p, idx); @@ -1273,8 +1273,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case VGT_STRMOUT_BUFFER_BASE_3: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; @@ -1295,8 +1295,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CP_COHER_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "missing reloc for CP_COHER_BASE " - "0x%04X\n", reg); + dev_warn_once(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1311,8 +1311,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case PA_SC_AA_CONFIG: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK; @@ -1320,8 +1320,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_PA_SC_AA_CONFIG: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK; @@ -1360,8 +1360,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); @@ -1378,8 +1378,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); @@ -1439,8 +1439,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -1467,8 +1467,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -1555,8 +1555,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 0x3c; @@ -1571,8 +1571,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; @@ -1584,8 +1584,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_HTILE_DATA_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx); @@ -1702,36 +1702,36 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_ALU_CONST_CACHE_LS_15: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case CAYMAN_SX_SCATTER_EXPORT_BASE: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1740,7 +1740,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; break; default: - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } return 0; @@ -1795,7 +1795,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1807,13 +1807,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return 0; if (pred_op > 2) { - DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); + dev_warn_once(p->dev, "bad SET PREDICATION operation %d\n", pred_op); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1827,7 +1827,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_CONTEXT_CONTROL: if (pkt->count != 1) { - DRM_ERROR("bad CONTEXT_CONTROL\n"); + dev_warn_once(p->dev, "bad CONTEXT_CONTROL\n"); return -EINVAL; } break; @@ -1835,17 +1835,17 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_NUM_INSTANCES: case PACKET3_CLEAR_STATE: if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; case CAYMAN_PACKET3_DEALLOC_STATE: if (p->rdev->family < CHIP_CAYMAN) { - DRM_ERROR("bad PACKET3_DEALLOC_STATE\n"); + dev_warn_once(p->dev, "bad PACKET3_DEALLOC_STATE\n"); return -EINVAL; } if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; @@ -1854,12 +1854,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } @@ -1872,7 +1872,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -1880,7 +1880,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDEX_BUFFER_SIZE: { if (pkt->count != 0) { - DRM_ERROR("bad INDEX_BUFFER_SIZE\n"); + dev_warn_once(p->dev, "bad INDEX_BUFFER_SIZE\n"); return -EINVAL; } break; @@ -1889,12 +1889,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, { uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } @@ -1907,7 +1907,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -1917,12 +1917,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } @@ -1935,63 +1935,63 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_MULTI_AUTO: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_MULTI_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_IMMD: if (pkt->count < 2) { - DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_IMMD\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_OFFSET\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET_2: if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET_2\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -2005,19 +2005,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32] */ if (pkt->count != 2) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } /* currently only supporting setting indirect draw buffer base address */ if (idx_value != 1) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } @@ -2039,54 +2039,54 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context */ if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDIRECT\n"); + dev_warn_once(p->dev, "bad DRAW_INDIRECT\n"); return -EINVAL; } if (idx_value + size > track->indirect_draw_buffer_size) { - dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n", - idx_value, size, track->indirect_draw_buffer_size); + dev_warn_once(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n", + idx_value, size, track->indirect_draw_buffer_size); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DISPATCH_DIRECT: if (pkt->count != 3) { - DRM_ERROR("bad DISPATCH_DIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_DIRECT\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DISPATCH_INDIRECT: if (pkt->count != 1) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ @@ -2095,7 +2095,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } @@ -2106,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); ib[idx+2] = upper_32_bits(offset) & 0xff; } else if (idx_value & 0x100) { - DRM_ERROR("cannot use PFP on REG wait\n"); + dev_warn_once(p->dev, "cannot use PFP on REG wait\n"); return -EINVAL; } break; @@ -2115,7 +2115,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u32 command, size, info; u64 offset, tmp; if (pkt->count != 4) { - DRM_ERROR("bad CP DMA\n"); + dev_warn_once(p->dev, "bad CP DMA\n"); return -EINVAL; } command = radeon_get_ib_value(p, idx+4); @@ -2129,7 +2129,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if (size % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(p->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -2137,19 +2137,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* src address space is register */ /* GDS is ok */ if (((info & 0x60000000) >> 29) != 1) { - DRM_ERROR("CP DMA SAS not supported\n"); + dev_warn_once(p->dev, "CP DMA SAS not supported\n"); return -EINVAL; } } else { if (command & PACKET3_CP_DMA_CMD_SAIC) { - DRM_ERROR("CP DMA SAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA SAIC only supported for registers\n"); return -EINVAL; } /* src address space is memory */ if (((info & 0x60000000) >> 29) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA SRC\n"); + dev_warn_once(p->dev, "bad CP DMA SRC\n"); return -EINVAL; } @@ -2159,15 +2159,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx] = offset; ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); } else if (((info & 0x60000000) >> 29) != 2) { - DRM_ERROR("bad CP DMA SRC_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA SRC_SEL\n"); return -EINVAL; } } @@ -2175,19 +2175,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* dst address space is register */ /* GDS is ok */ if (((info & 0x00300000) >> 20) != 1) { - DRM_ERROR("CP DMA DAS not supported\n"); + dev_warn_once(p->dev, "CP DMA DAS not supported\n"); return -EINVAL; } } else { /* dst address space is memory */ if (command & PACKET3_CP_DMA_CMD_DAIC) { - DRM_ERROR("CP DMA DAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA DAIC only supported for registers\n"); return -EINVAL; } if (((info & 0x00300000) >> 20) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA DST\n"); + dev_warn_once(p->dev, "bad CP DMA DST\n"); return -EINVAL; } @@ -2197,15 +2197,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+2] = offset; ib[idx+3] = upper_32_bits(offset) & 0xff; } else { - DRM_ERROR("bad CP DMA DST_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA DST_SEL\n"); return -EINVAL; } } @@ -2213,13 +2213,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } case PACKET3_PFP_SYNC_ME: if (pkt->count) { - DRM_ERROR("bad PFP_SYNC_ME\n"); + dev_warn_once(p->dev, "bad PFP_SYNC_ME\n"); return -EINVAL; } break; case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ @@ -2227,7 +2227,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, radeon_get_ib_value(p, idx + 2) != 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2235,7 +2235,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_EVENT_WRITE: if (pkt->count != 2 && pkt->count != 0) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } if (pkt->count) { @@ -2243,7 +2243,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } offset = reloc->gpu_offset + @@ -2259,12 +2259,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } @@ -2281,12 +2281,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } @@ -2304,7 +2304,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { @@ -2321,7 +2321,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONTEXT_REG_START) || (start_reg >= PACKET3_SET_CONTEXT_REG_END) || (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONTEXT_REG\n"); return -EINVAL; } for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { @@ -2334,7 +2334,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SET_RESOURCE: if (pkt->count % 8) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START; @@ -2342,7 +2342,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_RESOURCE_START) || (start_reg >= PACKET3_SET_RESOURCE_END) || (end_reg >= PACKET3_SET_RESOURCE_END)) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } for (i = 0; i < (pkt->count / 8); i++) { @@ -2355,7 +2355,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* tex base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -2392,7 +2392,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } else { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2411,14 +2411,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* vtx base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (vtx)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (vtx)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1+(i*8)+0); size = radeon_get_ib_value(p, idx+1+(i*8)+1); if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ - dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n"); + dev_warn_once(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", + size + offset, radeon_bo_size(reloc->robj)); ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } @@ -2431,7 +2432,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } } @@ -2445,7 +2446,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_BOOL_CONST_START) || (start_reg >= PACKET3_SET_BOOL_CONST_END) || (end_reg >= PACKET3_SET_BOOL_CONST_END)) { - DRM_ERROR("bad SET_BOOL_CONST\n"); + dev_warn_once(p->dev, "bad SET_BOOL_CONST\n"); return -EINVAL; } break; @@ -2455,7 +2456,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_LOOP_CONST_START) || (start_reg >= PACKET3_SET_LOOP_CONST_END) || (end_reg >= PACKET3_SET_LOOP_CONST_END)) { - DRM_ERROR("bad SET_LOOP_CONST\n"); + dev_warn_once(p->dev, "bad SET_LOOP_CONST\n"); return -EINVAL; } break; @@ -2465,13 +2466,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CTL_CONST_START) || (start_reg >= PACKET3_SET_CTL_CONST_END) || (end_reg >= PACKET3_SET_CTL_CONST_END)) { - DRM_ERROR("bad SET_CTL_CONST\n"); + dev_warn_once(p->dev, "bad SET_CTL_CONST\n"); return -EINVAL; } break; case PACKET3_SET_SAMPLER: if (pkt->count % 3) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START; @@ -2479,13 +2480,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_SAMPLER_START) || (start_reg >= PACKET3_SET_SAMPLER_END) || (end_reg >= PACKET3_SET_SAMPLER_END)) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BUFFER_UPDATE: if (pkt->count != 4) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); return -EINVAL; } /* Updating memory at DST_ADDRESS. */ @@ -2493,14 +2494,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2512,14 +2513,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2532,23 +2533,23 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 3) { - DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+0); offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2558,7 +2559,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } case PACKET3_COPY_DW: if (pkt->count != 4) { - DRM_ERROR("bad COPY_DW (invalid count)\n"); + dev_warn_once(p->dev, "bad COPY_DW (invalid count)\n"); return -EINVAL; } if (idx_value & 0x1) { @@ -2566,14 +2567,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* SRC is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2583,8 +2584,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* SRC is a reg. */ reg = radeon_get_ib_value(p, idx+1) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 1); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); return -EINVAL; } } @@ -2593,14 +2594,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* DST is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2610,8 +2611,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* DST is a reg. */ reg = radeon_get_ib_value(p, idx+3) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 3); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 3); return -EINVAL; } } @@ -2622,7 +2623,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint32_t allowed_reg_base; uint32_t source_sel; if (pkt->count != 2) { - DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (invalid count)\n"); return -EINVAL; } @@ -2632,8 +2633,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, areg = idx_value >> 16; if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { - dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n", - areg, idx); + dev_warn_once(p->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); return -EINVAL; } @@ -2643,7 +2644,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint32_t swap; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx + 1); @@ -2656,7 +2657,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (offset & 0xfffffffc) | swap; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { - DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (unsupported operation)\n"); return -EINVAL; } break; @@ -2666,23 +2667,23 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 2) { - DRM_ERROR("bad COND_EXEC (invalid count)\n"); + dev_warn_once(p->dev, "bad COND_EXEC (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COND_EXEC (missing reloc)\n"); + dev_warn_once(p->dev, "bad COND_EXEC (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx + 0); offset += ((u64)(radeon_get_ib_value(p, idx + 1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad COND_EXEC (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad COND_EXEC (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COND_EXEC bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COND_EXEC bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2692,7 +2693,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } case PACKET3_COND_WRITE: if (pkt->count != 7) { - DRM_ERROR("bad COND_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad COND_WRITE (invalid count)\n"); return -EINVAL; } if (idx_value & 0x10) { @@ -2700,14 +2701,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* POLL is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COND_WRITE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COND_WRITE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx + 1); offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32; if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COND_WRITE src bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COND_WRITE src bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2717,8 +2718,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* POLL is a reg. */ reg = radeon_get_ib_value(p, idx + 1) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 1); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); return -EINVAL; } } @@ -2727,14 +2728,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* WRITE is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COND_WRITE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COND_WRITE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx + 5); offset += ((u64)(radeon_get_ib_value(p, idx + 6) & 0xff)) << 32; if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COND_WRITE dst bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COND_WRITE dst bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2744,8 +2745,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* WRITE is a reg. */ reg = radeon_get_ib_value(p, idx + 5) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 5); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 5); return -EINVAL; } } @@ -2753,7 +2754,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2853,7 +2854,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) r = evergreen_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); kfree(p->track); p->track = NULL; return -EINVAL; @@ -2896,8 +2897,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) do { if (p->idx >= ib_chunk->length_dw) { - DRM_ERROR("Can not parse packet at %d after CS end %d !\n", - p->idx, ib_chunk->length_dw); + dev_warn_once(p->dev, "Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); return -EINVAL; } idx = p->idx; @@ -2910,7 +2911,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case DMA_PACKET_WRITE: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } switch (sub_cmd) { @@ -2932,24 +2933,24 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_COPY: r = r600_dma_cs_next_reloc(p, &src_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } switch (sub_cmd) { @@ -2961,13 +2962,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -3001,13 +3002,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -3020,13 +3021,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", - src_offset + count, radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", + src_offset + count, radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", - dst_offset + count, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", + dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); @@ -3039,7 +3040,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x41: /* L2L, partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2L Partial is cayman only !\n"); return -EINVAL; } ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); @@ -3054,7 +3055,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* L2L, dw, broadcast */ r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2L, dw, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3064,18 +3065,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+3); src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -3089,12 +3090,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2T Frame to Field */ case 0x48: if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3104,18 +3105,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3128,7 +3129,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x49: /* L2T, T2L partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } /* detile bit */ @@ -3151,12 +3152,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4b: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3166,18 +3167,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3212,13 +3213,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -3227,7 +3228,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4d: /* T2T partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); @@ -3238,12 +3239,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4f: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3253,18 +3254,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3274,21 +3275,21 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); return -EINVAL; } break; case DMA_PACKET_CONSTANT_FILL: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_CONSTANT_FILL\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -3299,7 +3300,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(p->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (p->idx < p->chunk_ib->length_dw); @@ -3430,7 +3431,7 @@ static bool evergreen_vm_reg_valid(u32 reg) case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: return true; default: - DRM_ERROR("Invalid register 0x%x in CS\n", reg); + DRM_DEBUG("Invalid register 0x%x in CS\n", reg); return false; } } @@ -3448,7 +3449,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, break; case PACKET3_SET_BASE: if (idx_value != 1) { - DRM_ERROR("bad SET_BASE"); + dev_warn_once(rdev->dev, "bad SET_BASE"); return -EINVAL; } break; @@ -3519,7 +3520,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(rdev->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -3539,7 +3540,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if ((command & 0x1fffff) % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(rdev->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -3550,14 +3551,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_SAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } @@ -3571,14 +3572,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_DAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } @@ -3591,7 +3592,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, uint32_t allowed_reg_base; if (pkt->count != 2) { - DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); + dev_warn_once(rdev->dev, "bad SET_APPEND_CNT (invalid count)\n"); return -EINVAL; } @@ -3601,8 +3602,8 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, areg = idx_value >> 16; if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { - DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n", - areg, idx); + dev_warn_once(rdev->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); return -EINVAL; } break; @@ -3681,7 +3682,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3732,7 +3735,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3743,7 +3748,7 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(rdev->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (idx < ib->length_dw); diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index e08559c44a5c..82edbfb259bf 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -3397,7 +3397,7 @@ static int ni_enable_smc_cac(struct radeon_device *rdev, if (PPSMC_Result_OK != smc_result) ret = -EINVAL; - ni_pi->cac_enabled = (PPSMC_Result_OK == smc_result) ? true : false; + ni_pi->cac_enabled = PPSMC_Result_OK == smc_result; } } else if (ni_pi->cac_enabled) { smc_result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_DisableCac); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 80703417d8a1..07a9c523a17a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1298,8 +1298,8 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1313,7 +1313,7 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, tile_flags |= RADEON_DST_TILE_MACRO; if (reloc->tiling_flags & RADEON_TILING_MICRO) { if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); + dev_warn_once(p->dev, "Cannot src blit from microtiled surface\n"); radeon_cs_dump_packet(p, pkt); return -EINVAL; } @@ -1342,8 +1342,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, track = (struct r100_cs_track *)p->track; c = radeon_get_ib_value(p, idx++) & 0x1F; if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); + dev_warn_once(p->dev, "Only 16 vertex buffers are allowed %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return -EINVAL; } @@ -1351,8 +1351,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, for (i = 0; i < (c - 1); i += 2, idx += 3) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1364,8 +1364,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, track->arrays[i + 0].esize &= 0x7F; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1377,8 +1377,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, if (c & 1) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1470,12 +1470,12 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) /* check its a wait until and only 1 count */ if (waitreloc.reg != RADEON_WAIT_UNTIL || waitreloc.count != 0) { - DRM_ERROR("vline wait had illegal wait until segment\n"); + dev_warn_once(p->dev, "vline wait had illegal wait until segment\n"); return -EINVAL; } if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { - DRM_ERROR("vline wait had illegal wait until\n"); + dev_warn_once(p->dev, "vline wait had illegal wait until\n"); return -EINVAL; } @@ -1493,7 +1493,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) reg = R100_CP_PACKET0_GET_REG(header); crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { - DRM_ERROR("cannot find crtc %d\n", crtc_id); + dev_warn_once(p->dev, "cannot find crtc %d\n", crtc_id); return -ENOENT; } radeon_crtc = to_radeon_crtc(crtc); @@ -1514,7 +1514,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; break; default: - DRM_ERROR("unknown crtc reloc\n"); + dev_warn_once(p->dev, "unknown crtc reloc\n"); return -EINVAL; } ib[h_idx] = header; @@ -1599,7 +1599,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", idx, reg); radeon_cs_dump_packet(p, pkt); return r; @@ -1616,8 +1616,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1629,8 +1629,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1645,8 +1645,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_TXOFFSET_0) / 24; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1672,8 +1672,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1690,8 +1690,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1708,8 +1708,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1726,8 +1726,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLORPITCH: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1768,8 +1768,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->cb[0].cpp = 4; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); @@ -1797,8 +1797,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1927,10 +1927,10 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, idx = pkt->idx + 1; value = radeon_get_ib_value(p, idx + 2); if ((value + 1) > radeon_bo_size(robj)) { - DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " - "(need %u have %lu) !\n", - value + 1, - radeon_bo_size(robj)); + dev_warn_once(p->dev, "[drm] Buffer too small for PACKET3 INDX_BUFFER " + "(need %u have %lu) !\n", + value + 1, + radeon_bo_size(robj)); return -EINVAL; } return 0; @@ -1957,7 +1957,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDX_BUFFER: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1971,7 +1971,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1992,7 +1992,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_3D_DRAW_IMMD: if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0)); @@ -2005,7 +2005,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_IMMD_2: if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx); @@ -2051,7 +2051,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2093,8 +2093,8 @@ int r100_cs_parse(struct radeon_cs_parser *p) r = r100_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", - pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", + pkt.type); return -EINVAL; } if (r) @@ -2105,19 +2105,19 @@ int r100_cs_parse(struct radeon_cs_parser *p) static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) { - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("use_pitch %d\n", t->use_pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("width_11 %d\n", t->width_11); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("height_11 %d\n", t->height_11); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); - DRM_ERROR("compress format %d\n", t->compress_format); + DRM_DEBUG("pitch %d\n", t->pitch); + DRM_DEBUG("use_pitch %d\n", t->use_pitch); + DRM_DEBUG("width %d\n", t->width); + DRM_DEBUG("width_11 %d\n", t->width_11); + DRM_DEBUG("height %d\n", t->height); + DRM_DEBUG("height_11 %d\n", t->height_11); + DRM_DEBUG("num levels %d\n", t->num_levels); + DRM_DEBUG("depth %d\n", t->txdepth); + DRM_DEBUG("bpp %d\n", t->cpp); + DRM_DEBUG("coordinate type %d\n", t->tex_coord_type); + DRM_DEBUG("width round to power of 2 %d\n", t->roundup_w); + DRM_DEBUG("height round to power of 2 %d\n", t->roundup_h); + DRM_DEBUG("compress format %d\n", t->compress_format); } static int r100_track_compress_size(int compress_format, int w, int h) @@ -2172,8 +2172,9 @@ static int r100_cs_track_cube(struct radeon_device *rdev, size += track->textures[idx].cube_info[face].offset; if (size > radeon_bo_size(cube_robj)) { - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", - size, radeon_bo_size(cube_robj)); + dev_warn_once(rdev->dev, + "Cube texture offset greater than object size %lu %lu\n", + size, radeon_bo_size(cube_robj)); r100_cs_track_texture_print(&track->textures[idx]); return -1; } @@ -2196,7 +2197,7 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, continue; robj = track->textures[u].robj; if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); + dev_warn_once(rdev->dev, "No texture bound to unit %u\n", u); return -EINVAL; } size = 0; @@ -2249,13 +2250,13 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, size *= 6; break; default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); + dev_warn_once(rdev->dev, "Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); return -EINVAL; } if (size > radeon_bo_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_bo_size(robj)); + dev_warn_once(rdev->dev, "Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_bo_size(robj)); r100_cs_track_texture_print(&track->textures[u]); return -EINVAL; } @@ -2277,18 +2278,18 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < num_cb; i++) { if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + dev_warn_once(rdev->dev, "[drm] No buffer for color buffer %d !\n", i); return -EINVAL; } size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; size += track->cb[i].offset; if (size > radeon_bo_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->cb[i].robj)); + dev_warn_once(rdev->dev, "[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); return -EINVAL; } } @@ -2296,18 +2297,18 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) if (track->zb_dirty && track->z_enabled) { if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); + dev_warn_once(rdev->dev, "[drm] No buffer for z buffer !\n"); return -EINVAL; } size = track->zb.pitch * track->zb.cpp * track->maxy; size += track->zb.offset; if (size > radeon_bo_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_bo_size(track->zb.robj)); - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", - track->zb.pitch, track->zb.cpp, - track->zb.offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_bo_size(track->zb.robj)); + dev_warn_once(rdev->dev, "[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); return -EINVAL; } } @@ -2315,19 +2316,19 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) if (track->aa_dirty && track->aaresolve) { if (track->aa.robj == NULL) { - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); + dev_warn_once(rdev->dev, "[drm] No buffer for AA resolve buffer %d !\n", i); return -EINVAL; } /* I believe the format comes from colorbuffer0. */ size = track->aa.pitch * track->cb[0].cpp * track->maxy; size += track->aa.offset; if (size > radeon_bo_size(track->aa.robj)) { - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->aa.robj)); - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", - i, track->aa.pitch, track->cb[0].cpp, - track->aa.offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for AA resolve buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->aa.robj)); + dev_warn_once(rdev->dev, "[drm] AA resolve buffer %d (%u %u %u %u)\n", + i, track->aa.pitch, track->cb[0].cpp, + track->aa.offset, track->maxy); return -EINVAL; } } @@ -2344,17 +2345,17 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < track->num_arrays; i++) { size = track->arrays[i].esize * track->max_indx * 4UL; if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); return -EINVAL; } if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + dev_warn_once(rdev->dev, "Max indices %u\n", track->max_indx); return -EINVAL; } } @@ -2363,16 +2364,16 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < track->num_arrays; i++) { size = track->arrays[i].esize * (nverts - 1) * 4UL; if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); return -EINVAL; } if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); return -EINVAL; } } @@ -2380,16 +2381,16 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) case 3: size = track->vtx_size * nverts; if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); + dev_warn_once(rdev->dev, "IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + dev_warn_once(rdev->dev, "VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); return -EINVAL; } break; default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); + dev_warn_once(rdev->dev, "[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index f5f2ffea5ab2..10a65a71de31 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -163,8 +163,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -180,8 +180,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -193,8 +193,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -212,8 +212,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, i = (reg - R200_PP_TXOFFSET_0) / 24; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -265,8 +265,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -283,8 +283,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLORPITCH: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -326,12 +326,12 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->cb[0].cpp = 4; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { - DRM_ERROR("No support for depth xy offset in kms\n"); + dev_warn_once(p->dev, "No support for depth xy offset in kms\n"); return -EINVAL; } @@ -360,8 +360,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index d22889fbfa9c..d2ee6deec039 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -645,8 +645,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -664,8 +664,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, i = (reg - R300_RB3D_COLOROFFSET0) >> 2; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -677,8 +677,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_ZB_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -706,8 +706,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, i = (reg - R300_TX_OFFSET_0) >> 2; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -762,7 +762,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, /* RB3D_CCTL */ if ((idx_value & (1 << 10)) && /* CMASK_ENABLE */ p->rdev->cmask_filp != p->filp) { - DRM_ERROR("Invalid RB3D_CCTL: Cannot enable CMASK.\n"); + dev_warn_once(p->dev, "Invalid RB3D_CCTL: Cannot enable CMASK.\n"); return -EINVAL; } track->num_cb = ((idx_value >> 5) & 0x3) + 1; @@ -779,8 +779,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -812,8 +812,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case 5: if (p->rdev->family < CHIP_RV515) { - DRM_ERROR("Invalid color buffer format (%d)!\n", - ((idx_value >> 21) & 0xF)); + dev_warn_once(p->dev, "Invalid color buffer format (%d)!\n", + ((idx_value >> 21) & 0xF)); return -EINVAL; } fallthrough; @@ -827,8 +827,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->cb[i].cpp = 16; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> 21) & 0xF)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> 21) & 0xF)); return -EINVAL; } track->cb_dirty = true; @@ -853,8 +853,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->zb.cpp = 4; break; default: - DRM_ERROR("Invalid z buffer format (%d) !\n", - (idx_value & 0xF)); + dev_warn_once(p->dev, "Invalid z buffer format (%d) !\n", + (idx_value & 0xF)); return -EINVAL; } track->zb_dirty = true; @@ -864,8 +864,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -962,8 +962,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case R300_TX_FORMAT_ATI2N: if (p->rdev->family < CHIP_R420) { - DRM_ERROR("Invalid texture format %u\n", - (idx_value & 0x1F)); + dev_warn_once(p->dev, "Invalid texture format %u\n", + (idx_value & 0x1F)); return -EINVAL; } /* The same rules apply as for DXT3/5. */ @@ -974,8 +974,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->textures[i].compress_format = R100_TRACK_COMP_DXT35; break; default: - DRM_ERROR("Invalid texture format %u\n", - (idx_value & 0x1F)); + dev_warn_once(p->dev, "Invalid texture format %u\n", + (idx_value & 0x1F)); return -EINVAL; } track->tex_dirty = true; @@ -1041,7 +1041,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, R100_TRACK_COMP_DXT1; } } else if (idx_value & (1 << 14)) { - DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); + dev_warn_once(p->dev, "Forbidden bit TXFORMAT_MSB\n"); return -EINVAL; } track->tex_dirty = true; @@ -1079,8 +1079,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_ZB_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1121,8 +1121,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_RB3D_AARESOLVE_OFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1191,7 +1191,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDX_BUFFER: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1207,7 +1207,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); @@ -1222,7 +1222,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx); @@ -1272,7 +1272,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -1308,7 +1308,7 @@ int r300_cs_parse(struct radeon_cs_parser *p) r = r300_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); return -EINVAL; } if (r) { diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index ac77d1246b94..8eeceeeca362 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -361,9 +361,9 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) format = G_0280A0_FORMAT(track->cb_color_info[i]); if (!r600_fmt_is_valid_color(format)) { - dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", - __func__, __LINE__, format, - i, track->cb_color_info[i]); + dev_warn_once(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", + __func__, __LINE__, format, + i, track->cb_color_info[i]); return -EINVAL; } /* pitch in pixels */ @@ -384,9 +384,9 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) array_check.blocksize = r600_fmt_get_blocksize(format); if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, - G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, - track->cb_color_info[i]); + dev_warn_once(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, + G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, + track->cb_color_info[i]); return -EINVAL; } switch (array_mode) { @@ -402,25 +402,26 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) case V_0280A0_ARRAY_2D_TILED_THIN1: break; default: - dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, - G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, - track->cb_color_info[i]); + dev_warn_once(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, + G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, + track->cb_color_info[i]); return -EINVAL; } if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, pitch, pitch_align, array_mode); + dev_warn_once(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, pitch, pitch_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(height, height_align)) { - dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, height, height_align, array_mode); + dev_warn_once(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, height, height_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i, - base_offset, base_align, array_mode); + dev_warn_once(p->dev, + "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i, + base_offset, base_align, array_mode); return -EINVAL; } @@ -447,13 +448,14 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) * broken userspace. */ } else { - dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n", - __func__, i, array_mode, - track->cb_color_bo_offset[i], tmp, - radeon_bo_size(track->cb_color_bo[i]), - pitch, height, r600_fmt_get_nblocksx(format, pitch), - r600_fmt_get_nblocksy(format, height), - r600_fmt_get_blocksize(format)); + dev_warn_once(p->dev, + "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n", + __func__, i, array_mode, + track->cb_color_bo_offset[i], tmp, + radeon_bo_size(track->cb_color_bo[i]), + pitch, height, r600_fmt_get_nblocksx(format, pitch), + r600_fmt_get_nblocksy(format, height), + r600_fmt_get_blocksize(format)); return -EINVAL; } } @@ -478,11 +480,11 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) if (bytes + track->cb_color_frag_offset[i] > radeon_bo_size(track->cb_color_frag_bo[i])) { - dev_warn(p->dev, "%s FMASK_TILE_MAX too large " - "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", - __func__, tile_max, bytes, - track->cb_color_frag_offset[i], - radeon_bo_size(track->cb_color_frag_bo[i])); + dev_warn_once(p->dev, "%s FMASK_TILE_MAX too large " + "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, tile_max, bytes, + track->cb_color_frag_offset[i], + radeon_bo_size(track->cb_color_frag_bo[i])); return -EINVAL; } } @@ -496,17 +498,17 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) if (bytes + track->cb_color_tile_offset[i] > radeon_bo_size(track->cb_color_tile_bo[i])) { - dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large " - "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", - __func__, block_max, bytes, - track->cb_color_tile_offset[i], - radeon_bo_size(track->cb_color_tile_bo[i])); + dev_warn_once(p->dev, "%s CMASK_BLOCK_MAX too large " + "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, block_max, bytes, + track->cb_color_tile_offset[i], + radeon_bo_size(track->cb_color_tile_bo[i])); return -EINVAL; } break; } default: - dev_warn(p->dev, "%s invalid tile mode\n", __func__); + dev_warn_once(p->dev, "%s invalid tile mode\n", __func__); return -EINVAL; } return 0; @@ -526,7 +528,7 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) if (track->db_bo == NULL) { - dev_warn(p->dev, "z/stencil with no depth buffer\n"); + dev_warn_once(p->dev, "z/stencil with no depth buffer\n"); return -EINVAL; } switch (G_028010_FORMAT(track->db_depth_info)) { @@ -544,20 +546,22 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) bpe = 8; break; default: - dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); + dev_warn_once(p->dev, + "z/stencil with invalid format %d\n", + G_028010_FORMAT(track->db_depth_info)); return -EINVAL; } if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { if (!track->db_depth_size_idx) { - dev_warn(p->dev, "z/stencil buffer size not set\n"); + dev_warn_once(p->dev, "z/stencil buffer size not set\n"); return -EINVAL; } tmp = radeon_bo_size(track->db_bo) - track->db_offset; tmp = (tmp / bpe) >> 6; if (!tmp) { - dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", - track->db_depth_size, bpe, track->db_offset, - radeon_bo_size(track->db_bo)); + dev_warn_once(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", + track->db_depth_size, bpe, track->db_offset, + radeon_bo_size(track->db_bo)); return -EINVAL; } ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); @@ -579,9 +583,9 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) array_check.blocksize = bpe; if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); + dev_warn_once(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); return -EINVAL; } switch (array_mode) { @@ -592,24 +596,24 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) case V_028010_ARRAY_2D_TILED_THIN1: break; default: - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); + dev_warn_once(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); return -EINVAL; } if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", + dev_warn_once(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", __func__, __LINE__, pitch, pitch_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(height, height_align)) { - dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", + dev_warn_once(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", __func__, __LINE__, height, height_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, + dev_warn_once(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, base_offset, base_align, array_mode); return -EINVAL; } @@ -618,10 +622,11 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; tmp = ntiles * bpe * 64 * nviews * track->nsamples; if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { - dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", - array_mode, - track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, - radeon_bo_size(track->db_bo)); + dev_warn_once(p->dev, + "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", + array_mode, + track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, + radeon_bo_size(track->db_bo)); return -EINVAL; } } @@ -632,13 +637,13 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) unsigned nbx, nby; if (track->htile_bo == NULL) { - dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", - __func__, __LINE__, track->db_depth_info); + dev_warn_once(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", + __func__, __LINE__, track->db_depth_info); return -EINVAL; } if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { - dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", - __func__, __LINE__, track->db_depth_size); + dev_warn_once(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", + __func__, __LINE__, track->db_depth_size); return -EINVAL; } @@ -676,8 +681,8 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) nby = round_up(nby, 16 * 8); break; default: - dev_warn(p->dev, "%s:%d invalid num pipes %d\n", - __func__, __LINE__, track->npipes); + dev_warn_once(p->dev, "%s:%d invalid num pipes %d\n", + __func__, __LINE__, track->npipes); return -EINVAL; } } @@ -689,9 +694,9 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) size += track->htile_offset; if (size > radeon_bo_size(track->htile_bo)) { - dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", - __func__, __LINE__, radeon_bo_size(track->htile_bo), - size, nbx, nby); + dev_warn_once(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", + __func__, __LINE__, radeon_bo_size(track->htile_bo), + size, nbx, nby); return -EINVAL; } } @@ -718,13 +723,13 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) u64 offset = (u64)track->vgt_strmout_bo_offset[i] + (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - i, offset, - radeon_bo_size(track->vgt_strmout_bo[i])); + dev_warn_once(p->dev, "streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } } else { - dev_warn(p->dev, "No buffer for streamout %d\n", i); + dev_warn_once(p->dev, "No buffer for streamout %d\n", i); return -EINVAL; } } @@ -753,8 +758,8 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) (tmp >> (i * 4)) & 0xF) { /* at least one component is enabled */ if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + dev_warn_once(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); return -EINVAL; } /* perform rewrite of CB_COLOR[0-7]_SIZE */ @@ -841,33 +846,33 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, /* check its a WAIT_REG_MEM */ if (wait_reg_mem.type != RADEON_PACKET_TYPE3 || wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) { - DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n"); + dev_warn_once(p->dev, "vline wait missing WAIT_REG_MEM segment\n"); return -EINVAL; } wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1); /* bit 4 is reg (0) or mem (1) */ if (wait_reg_mem_info & 0x10) { - DRM_ERROR("vline WAIT_REG_MEM waiting on MEM instead of REG\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM waiting on MEM instead of REG\n"); return -EINVAL; } /* bit 8 is me (0) or pfp (1) */ if (wait_reg_mem_info & 0x100) { - DRM_ERROR("vline WAIT_REG_MEM waiting on PFP instead of ME\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM waiting on PFP instead of ME\n"); return -EINVAL; } /* waiting for value to be equal */ if ((wait_reg_mem_info & 0x7) != 0x3) { - DRM_ERROR("vline WAIT_REG_MEM function not equal\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM function not equal\n"); return -EINVAL; } if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != vline_status[0]) { - DRM_ERROR("vline WAIT_REG_MEM bad reg\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM bad reg\n"); return -EINVAL; } if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != RADEON_VLINE_STAT) { - DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM bad bit mask\n"); return -EINVAL; } @@ -886,7 +891,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { - DRM_ERROR("cannot find crtc %d\n", crtc_id); + dev_warn_once(p->dev, "cannot find crtc %d\n", crtc_id); return -ENOENT; } radeon_crtc = to_radeon_crtc(crtc); @@ -907,7 +912,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, ib[h_idx] = header; ib[h_idx + 4] = vline_status[crtc_id] >> 2; } else { - DRM_ERROR("unknown crtc reloc\n"); + dev_warn_once(p->dev, "unknown crtc reloc\n"); return -EINVAL; } return 0; @@ -923,8 +928,8 @@ static int r600_packet0_check(struct radeon_cs_parser *p, case AVIVO_D1MODE_VLINE_START_END: r = r600_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); return r; } break; @@ -972,7 +977,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) i = (reg >> 7); if (i >= ARRAY_SIZE(r600_reg_safe_bm)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } m = 1 << ((reg >> 2) & 31); @@ -1013,8 +1018,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_VSTMP_RING_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1031,8 +1036,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) radeon_cs_packet_next_is_pkt3_nop(p)) { r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_depth_info = radeon_get_ib_value(p, idx); @@ -1073,8 +1078,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case VGT_STRMOUT_BUFFER_BASE_3: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; @@ -1096,8 +1101,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CP_COHER_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "missing reloc for CP_COHER_BASE " - "0x%04X\n", reg); + dev_warn_once(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1270,8 +1275,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 4; @@ -1285,8 +1290,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_DEPTH_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_offset = radeon_get_ib_value(p, idx) << 8; @@ -1298,8 +1303,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_HTILE_DATA_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8; @@ -1368,8 +1373,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_ALU_CONST_CACHE_VS_15: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1377,8 +1382,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SX_MEMORY_EXPORT_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1387,7 +1392,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; break; default: - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } return 0; @@ -1408,7 +1413,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, unsigned block_align, unsigned height_align, unsigned base_align, unsigned *l0_size, unsigned *mipmap_size) { - unsigned offset, i, level; + unsigned offset, i; unsigned width, height, depth, size; unsigned blocksize; unsigned nbx, nby; @@ -1420,7 +1425,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, w0 = r600_mip_minify(w0, 0); h0 = r600_mip_minify(h0, 0); d0 = r600_mip_minify(d0, 0); - for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) { + for (i = 0, offset = 0; i < nlevels; i++) { width = r600_mip_minify(w0, i); nbx = r600_fmt_get_nblocksx(format, width); @@ -1543,43 +1548,43 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, llevel = 0; break; default: - dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); + dev_warn_once(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); return -EINVAL; } if (!r600_fmt_is_valid_texture(format, p->family)) { - dev_warn(p->dev, "%s:%d texture invalid format %d\n", - __func__, __LINE__, format); + dev_warn_once(p->dev, "%s:%d texture invalid format %d\n", + __func__, __LINE__, format); return -EINVAL; } if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n", - __func__, __LINE__, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex array mode (%d) invalid\n", + __func__, __LINE__, G_038000_TILE_MODE(word0)); return -EINVAL; } /* XXX check height as well... */ if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n", - __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n", + __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (!IS_ALIGNED(mip_offset, base_align)) { - dev_warn(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n", - __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n", + __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (blevel > llevel) { - dev_warn(p->dev, "texture blevel %d > llevel %d\n", - blevel, llevel); + dev_warn_once(p->dev, "texture blevel %d > llevel %d\n", + blevel, llevel); } if (is_array) { barray = G_038014_BASE_ARRAY(word5); @@ -1592,16 +1597,16 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, &l0_size, &mipmap_size); /* using get ib will give us the offset into the texture bo */ if ((l0_size + word2) > radeon_bo_size(texture)) { - dev_warn(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n", - w0, h0, pitch_align, height_align, - array_check.array_mode, format, word2, - l0_size, radeon_bo_size(texture)); - dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align); + dev_warn_once(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n", + w0, h0, pitch_align, height_align, + array_check.array_mode, format, word2, + l0_size, radeon_bo_size(texture)); + dev_warn_once(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align); return -EINVAL; } /* using get ib will give us the offset into the mipmap bo */ if ((mipmap_size + word3) > radeon_bo_size(mipmap)) { - /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", + /*dev_warn_once(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/ } return 0; @@ -1613,13 +1618,13 @@ static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) i = (reg >> 7); if (i >= ARRAY_SIZE(r600_reg_safe_bm)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return false; } m = 1 << ((reg >> 2) & 31); if (!(r600_reg_safe_bm[i] & m)) return true; - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return false; } @@ -1648,7 +1653,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1660,13 +1665,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return 0; if (pred_op > 2) { - DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); + dev_warn_once(p->dev, "bad SET PREDICATION operation %d\n", pred_op); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1681,20 +1686,20 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case PACKET3_START_3D_CMDBUF: if (p->family >= CHIP_RV770 || pkt->count) { - DRM_ERROR("bad START_3D\n"); + dev_warn_once(p->dev, "bad START_3D\n"); return -EINVAL; } break; case PACKET3_CONTEXT_CONTROL: if (pkt->count != 1) { - DRM_ERROR("bad CONTEXT_CONTROL\n"); + dev_warn_once(p->dev, "bad CONTEXT_CONTROL\n"); return -EINVAL; } break; case PACKET3_INDEX_TYPE: case PACKET3_NUM_INSTANCES: if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES\n"); return -EINVAL; } break; @@ -1702,12 +1707,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p, { uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } @@ -1720,37 +1725,37 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_AUTO\n"); return -EINVAL; } r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_IMMD_BE: case PACKET3_DRAW_INDEX_IMMD: if (pkt->count < 2) { - DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_IMMD\n"); return -EINVAL; } r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ @@ -1759,7 +1764,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } @@ -1770,7 +1775,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0); ib[idx+2] = upper_32_bits(offset) & 0xff; } else if (idx_value & 0x100) { - DRM_ERROR("cannot use PFP on REG wait\n"); + dev_warn_once(p->dev, "cannot use PFP on REG wait\n"); return -EINVAL; } break; @@ -1779,24 +1784,24 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u32 command, size; u64 offset, tmp; if (pkt->count != 4) { - DRM_ERROR("bad CP DMA\n"); + dev_warn_once(p->dev, "bad CP DMA\n"); return -EINVAL; } command = radeon_get_ib_value(p, idx+4); size = command & 0x1fffff; if (command & PACKET3_CP_DMA_CMD_SAS) { /* src address space is register */ - DRM_ERROR("CP DMA SAS not supported\n"); + dev_warn_once(p->dev, "CP DMA SAS not supported\n"); return -EINVAL; } else { if (command & PACKET3_CP_DMA_CMD_SAIC) { - DRM_ERROR("CP DMA SAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA SAIC only supported for registers\n"); return -EINVAL; } /* src address space is memory */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad CP DMA SRC\n"); + dev_warn_once(p->dev, "bad CP DMA SRC\n"); return -EINVAL; } @@ -1806,8 +1811,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -1816,17 +1821,17 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } if (command & PACKET3_CP_DMA_CMD_DAS) { /* dst address space is register */ - DRM_ERROR("CP DMA DAS not supported\n"); + dev_warn_once(p->dev, "CP DMA DAS not supported\n"); return -EINVAL; } else { /* dst address space is memory */ if (command & PACKET3_CP_DMA_CMD_DAIC) { - DRM_ERROR("CP DMA DAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA DAIC only supported for registers\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad CP DMA DST\n"); + dev_warn_once(p->dev, "bad CP DMA DST\n"); return -EINVAL; } @@ -1836,8 +1841,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -1848,7 +1853,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ @@ -1856,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, radeon_get_ib_value(p, idx + 2) != 0) { r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1864,7 +1869,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_EVENT_WRITE: if (pkt->count != 2 && pkt->count != 0) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } if (pkt->count) { @@ -1872,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } offset = reloc->gpu_offset + @@ -1888,12 +1893,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } @@ -1911,7 +1916,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -1927,7 +1932,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) || (start_reg >= PACKET3_SET_CONTEXT_REG_END) || (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONTEXT_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -1939,7 +1944,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SET_RESOURCE: if (pkt->count % 7) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_OFFSET; @@ -1947,7 +1952,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) || (start_reg >= PACKET3_SET_RESOURCE_END) || (end_reg >= PACKET3_SET_RESOURCE_END)) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } for (i = 0; i < (pkt->count / 7); i++) { @@ -1959,7 +1964,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* tex base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1973,7 +1978,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* tex mip base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1994,15 +1999,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* vtx base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1+(i*7)+0); size = radeon_get_ib_value(p, idx+1+(i*7)+1) + 1; if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ - dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", - size + offset, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", + size + offset, radeon_bo_size(reloc->robj)); ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; } @@ -2015,7 +2020,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } } @@ -2027,7 +2032,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) || (start_reg >= PACKET3_SET_ALU_CONST_END) || (end_reg >= PACKET3_SET_ALU_CONST_END)) { - DRM_ERROR("bad SET_ALU_CONST\n"); + dev_warn_once(p->dev, "bad SET_ALU_CONST\n"); return -EINVAL; } } @@ -2038,7 +2043,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) || (start_reg >= PACKET3_SET_BOOL_CONST_END) || (end_reg >= PACKET3_SET_BOOL_CONST_END)) { - DRM_ERROR("bad SET_BOOL_CONST\n"); + dev_warn_once(p->dev, "bad SET_BOOL_CONST\n"); return -EINVAL; } break; @@ -2048,7 +2053,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) || (start_reg >= PACKET3_SET_LOOP_CONST_END) || (end_reg >= PACKET3_SET_LOOP_CONST_END)) { - DRM_ERROR("bad SET_LOOP_CONST\n"); + dev_warn_once(p->dev, "bad SET_LOOP_CONST\n"); return -EINVAL; } break; @@ -2058,13 +2063,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) || (start_reg >= PACKET3_SET_CTL_CONST_END) || (end_reg >= PACKET3_SET_CTL_CONST_END)) { - DRM_ERROR("bad SET_CTL_CONST\n"); + dev_warn_once(p->dev, "bad SET_CTL_CONST\n"); return -EINVAL; } break; case PACKET3_SET_SAMPLER: if (pkt->count % 3) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_OFFSET; @@ -2072,22 +2077,22 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) || (start_reg >= PACKET3_SET_SAMPLER_END) || (end_reg >= PACKET3_SET_SAMPLER_END)) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BASE_UPDATE: /* RS780 and RS880 also need this */ if (p->family < CHIP_RS780) { - DRM_ERROR("STRMOUT_BASE_UPDATE only supported on 7xx\n"); + dev_warn_once(p->dev, "STRMOUT_BASE_UPDATE only supported on 7xx\n"); return -EINVAL; } if (pkt->count != 1) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE packet count\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE packet count\n"); return -EINVAL; } if (idx_value > 3) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE index\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE index\n"); return -EINVAL; } { @@ -2095,25 +2100,27 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE reloc\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE reloc\n"); return -EINVAL; } if (reloc->robj != track->vgt_strmout_bo[idx_value]) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo does not match\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE, bo does not match\n"); return -EINVAL; } offset = (u64)radeon_get_ib_value(p, idx+1) << 8; if (offset != track->vgt_strmout_bo_offset[idx_value]) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n", - offset, track->vgt_strmout_bo_offset[idx_value]); + dev_warn_once(p->dev, + "bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n", + offset, track->vgt_strmout_bo_offset[idx_value]); return -EINVAL; } if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2121,17 +2128,17 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SURFACE_BASE_UPDATE: if (p->family >= CHIP_RV770 || p->family == CHIP_R600) { - DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + dev_warn_once(p->dev, "bad SURFACE_BASE_UPDATE\n"); return -EINVAL; } if (pkt->count) { - DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + dev_warn_once(p->dev, "bad SURFACE_BASE_UPDATE\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BUFFER_UPDATE: if (pkt->count != 4) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); return -EINVAL; } /* Updating memory at DST_ADDRESS. */ @@ -2139,14 +2146,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2158,14 +2166,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2178,23 +2187,23 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 3) { - DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+0); offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2204,7 +2213,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } case PACKET3_COPY_DW: if (pkt->count != 4) { - DRM_ERROR("bad COPY_DW (invalid count)\n"); + dev_warn_once(p->dev, "bad COPY_DW (invalid count)\n"); return -EINVAL; } if (idx_value & 0x1) { @@ -2212,14 +2221,14 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* SRC is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2236,14 +2245,14 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* DST is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2259,7 +2268,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2306,7 +2315,7 @@ int r600_cs_parse(struct radeon_cs_parser *p) r = r600_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); kfree(p->track); p->track = NULL; return -EINVAL; @@ -2346,13 +2355,13 @@ int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, *cs_reloc = NULL; if (p->chunk_relocs == NULL) { - DRM_ERROR("No relocation chunk !\n"); + dev_warn_once(p->dev, "No relocation chunk !\n"); return -EINVAL; } idx = p->dma_reloc_idx; if (idx >= p->nrelocs) { - DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", - idx, p->nrelocs); + dev_warn_once(p->dev, "Relocs at %d after relocations chunk end %d !\n", + idx, p->nrelocs); return -EINVAL; } *cs_reloc = &p->relocs[idx]; @@ -2385,8 +2394,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) do { if (p->idx >= ib_chunk->length_dw) { - DRM_ERROR("Can not parse packet at %d after CS end %d !\n", - p->idx, ib_chunk->length_dw); + dev_warn_once(p->dev, "Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); return -EINVAL; } idx = p->idx; @@ -2399,7 +2408,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) case DMA_PACKET_WRITE: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } if (tiled) { @@ -2417,20 +2426,20 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) p->idx += count + 3; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_COPY: r = r600_dma_cs_next_reloc(p, &src_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } if (tiled) { @@ -2484,31 +2493,31 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA copy src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_CONSTANT_FILL: if (p->family < CHIP_RV770) { - DRM_ERROR("Constant Fill is 7xx only !\n"); + dev_warn_once(p->dev, "Constant Fill is 7xx only !\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -2519,7 +2528,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(p->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (p->idx < p->chunk_ib->length_dw); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index b8e6202f1d5b..3f9c0011244f 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -834,7 +834,7 @@ void radeon_cs_dump_packet(struct radeon_cs_parser *p, ib = p->ib.ptr; idx = pkt->idx; for (i = 0; i <= (pkt->count + 1); i++, idx++) - DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); + dev_dbg(p->dev, "ib[%d]=0x%08X\n", idx, ib[idx]); } /** diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7a3e510327b7..9e35b14e2bf0 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -554,7 +554,7 @@ int radeon_wb_init(struct radeon_device *rdev) * cover the whole aperture even if VRAM size is inferior to aperture size * Novell bug 204882 + along with lots of ubuntu ones * - * Note 3: when limiting vram it's safe to overwritte real_vram_size because + * Note 3: when limiting vram it's safe to overwrite real_vram_size because * we are not in case where real_vram_size is inferior to mc_vram_size (ie * not affected by bogus hw of Novell bug 204882 + along with lots of ubuntu * ones) @@ -562,7 +562,7 @@ int radeon_wb_init(struct radeon_device *rdev) * Note 4: IGP TOM addr should be the same as the aperture addr, we don't * explicitly check for that thought. * - * FIXME: when reducing VRAM size align new size on power of 2. + * FIXME: when reducing VRAM size, align new size on power of 2. */ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base) { diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 4dc77c398617..351b9dfcdad8 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -926,10 +926,10 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, unsigned *fb_div, unsigned *ref_div) { /* limit reference * post divider to a maximum */ - ref_div_max = max(min(100 / post_div, ref_div_max), 1u); + ref_div_max = clamp(100 / post_div, 1u, ref_div_max); /* get matching reference and feedback divider */ - *ref_div = min(max(den/post_div, 1u), ref_div_max); + *ref_div = clamp(den / post_div, 1u, ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4bb242437ff6..acd89a20f272 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -346,14 +346,14 @@ int radeon_gart_init(struct radeon_device *rdev) DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); /* Allocate pages table */ - rdev->gart.pages = vzalloc(array_size(sizeof(void *), - rdev->gart.num_cpu_pages)); + rdev->gart.pages = vcalloc(rdev->gart.num_cpu_pages, + sizeof(void *)); if (rdev->gart.pages == NULL) { radeon_gart_fini(rdev); return -ENOMEM; } - rdev->gart.pages_entry = vmalloc(array_size(sizeof(uint64_t), - rdev->gart.num_gpu_pages)); + rdev->gart.pages_entry = vmalloc_array(rdev->gart.num_gpu_pages, + sizeof(uint64_t)); if (rdev->gart.pages_entry == NULL) { radeon_gart_fini(rdev); return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index d6aa1a3012a8..d1e8b9757a65 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -136,9 +136,9 @@ static void radeon_legacy_lvds_update(struct drm_encoder *encoder, int mode) } if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -545,9 +545,9 @@ static void radeon_legacy_primary_dac_dpms(struct drm_encoder *encoder, int mode WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -742,9 +742,9 @@ static void radeon_legacy_tmds_int_dpms(struct drm_encoder *encoder, int mode) WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -908,9 +908,9 @@ static void radeon_legacy_tmds_ext_dpms(struct drm_encoder *encoder, int mode) WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -1113,9 +1113,9 @@ static void radeon_legacy_tv_dac_dpms(struct drm_encoder *encoder, int mode) } if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index b4fb7e70320b..a855a96dd2ea 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -907,8 +907,7 @@ static void radeon_dpm_thermal_work_handler(struct work_struct *work) static bool radeon_dpm_single_display(struct radeon_device *rdev) { - bool single_display = (rdev->pm.dpm.new_active_crtc_count < 2) ? - true : false; + bool single_display = rdev->pm.dpm.new_active_crtc_count < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && rdev->asic->dpm.vblank_too_short) { diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index c9fef9b61ced..818554e60537 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -455,7 +455,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, r = radeon_ring_lock(rdev, ringC, 64); if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); + DRM_ERROR("Failed to lock ring C %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); @@ -481,7 +481,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, r = radeon_ring_lock(rdev, ringC, 64); if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); + DRM_ERROR("Failed to lock ring C %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 2355a78e1b69..bdbc1bbe8a9b 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -86,7 +86,7 @@ int radeon_vce_init(struct radeon_device *rdev) r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); if (r) { - dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", + dev_err(rdev->dev, "radeon_vce: can't load firmware \"%s\"\n", fw_name); return r; } @@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev) rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); - /* we can only work with this fw version for now */ + /* we can only work with these fw versions for now */ if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) && (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) && (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8)))) @@ -281,7 +281,7 @@ static void radeon_vce_idle_work_handler(struct work_struct *work) * * @rdev: radeon_device pointer * - * Make sure VCE is powerd up when we want to use it + * Make sure VCE is powered up when we want to use it */ void radeon_vce_note_usage(struct radeon_device *rdev) { diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c b/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c index af58b814e588..001b3543924a 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c @@ -1013,7 +1013,7 @@ err_reset_assert: } static const struct dev_pm_ops rcar_lvds_pm_ops = { - SET_RUNTIME_PM_OPS(rcar_lvds_runtime_suspend, rcar_lvds_runtime_resume, NULL) + RUNTIME_PM_OPS(rcar_lvds_runtime_suspend, rcar_lvds_runtime_resume, NULL) }; static struct platform_driver rcar_lvds_platform_driver = { @@ -1021,7 +1021,7 @@ static struct platform_driver rcar_lvds_platform_driver = { .remove = rcar_lvds_remove, .driver = { .name = "rcar-lvds", - .pm = &rcar_lvds_pm_ops, + .pm = pm_ptr(&rcar_lvds_pm_ops), .of_match_table = rcar_lvds_of_table, }, }; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c index 1af4c73f7a88..5c73a513f678 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c @@ -576,7 +576,10 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi, udelay(10); rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL); - ppisetr = PPISETR_DLEN_3 | PPISETR_CLEN; + rcar_mipi_dsi_clr(dsi, TXSETR, TXSETR_LANECNT_MASK); + rcar_mipi_dsi_set(dsi, TXSETR, dsi->lanes - 1); + + ppisetr = ((BIT(dsi->lanes) - 1) & PPISETR_DLEN_MASK) | PPISETR_CLEN; rcar_mipi_dsi_write(dsi, PPISETR, ppisetr); rcar_mipi_dsi_set(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ); @@ -934,9 +937,234 @@ static int rcar_mipi_dsi_host_detach(struct mipi_dsi_host *host, return 0; } +static ssize_t rcar_mipi_dsi_host_tx_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg, + bool is_rx_xfer) +{ + const bool is_tx_long = mipi_dsi_packet_format_is_long(msg->type); + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + struct mipi_dsi_packet packet; + u8 payload[16] = { 0 }; + u32 status; + int ret; + + ret = mipi_dsi_create_packet(&packet, msg); + if (ret) + return ret; + + /* Configure LP or HS command transfer. */ + rcar_mipi_dsi_write(dsi, TXCMSETR, (msg->flags & MIPI_DSI_MSG_USE_LPM) ? + TXCMSETR_SPDTYP : 0); + + /* Register access mode for RX transfer. */ + if (is_rx_xfer) + rcar_mipi_dsi_write(dsi, RXPSETR, 0); + + /* Do not use IRQ, poll for completion, the completion is quick. */ + rcar_mipi_dsi_write(dsi, TXCMIER, 0); + + /* + * Send the header: + * header[0] = Virtual Channel + Data Type + * header[1] = Word Count LSB (LP) or first param (SP) + * header[2] = Word Count MSB (LP) or second param (SP) + */ + rcar_mipi_dsi_write(dsi, TXCMPHDR, + (is_tx_long ? TXCMPHDR_FMT : 0) | + TXCMPHDR_VC(msg->channel) | + TXCMPHDR_DT(msg->type) | + TXCMPHDR_DATA1(packet.header[2]) | + TXCMPHDR_DATA0(packet.header[1])); + + if (is_tx_long) { + memcpy(payload, packet.payload, + min(msg->tx_len, sizeof(payload))); + + rcar_mipi_dsi_write(dsi, TXCMPPD0R, + (payload[3] << 24) | (payload[2] << 16) | + (payload[1] << 8) | payload[0]); + rcar_mipi_dsi_write(dsi, TXCMPPD1R, + (payload[7] << 24) | (payload[6] << 16) | + (payload[5] << 8) | payload[4]); + rcar_mipi_dsi_write(dsi, TXCMPPD2R, + (payload[11] << 24) | (payload[10] << 16) | + (payload[9] << 8) | payload[8]); + rcar_mipi_dsi_write(dsi, TXCMPPD3R, + (payload[15] << 24) | (payload[14] << 16) | + (payload[13] << 8) | payload[12]); + } + + /* Start the transfer, RX with BTA, TX without BTA. */ + if (is_rx_xfer) { + rcar_mipi_dsi_write(dsi, TXCMCR, TXCMCR_BTAREQ); + + /* Wait until the transmission, BTA, reception completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + (status & RXPSR_BTAREQEND), + 2000, 50000, false, dsi, RXPSR); + } else { + rcar_mipi_dsi_write(dsi, TXCMCR, TXCMCR_TXREQ); + + /* Wait until the transmission completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + (status & TXCMSR_TXREQEND), + 2000, 50000, false, dsi, TXCMSR); + } + + if (ret < 0) { + dev_err(dsi->dev, "Command transfer timeout (0x%08x)\n", + status); + return ret; + } + + return packet.size; +} + +static ssize_t rcar_mipi_dsi_host_rx_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + u8 *rx_buf = (u8 *)(msg->rx_buf); + u32 reg, data, status, wc; + int i, ret; + + /* RX transfer received data validation and parsing starts here. */ + reg = rcar_mipi_dsi_read(dsi, TOSR); + if (reg & TOSR_TATO) { /* Turn-Around TimeOut. */ + /* Clear TATO Turn-Around TimeOut bit. */ + rcar_mipi_dsi_write(dsi, TOSR, TOSR_TATO); + return -ETIMEDOUT; + } + + reg = rcar_mipi_dsi_read(dsi, RXPSR); + + if (msg->flags & MIPI_DSI_MSG_REQ_ACK) { + /* Transfer with zero-length RX. */ + if (!(reg & RXPSR_RCVACK)) { + /* No ACK on RX response received. */ + return -EINVAL; + } + } else { + /* Transfer with non-zero-length RX. */ + if (!(reg & RXPSR_RCVRESP)) { + /* No packet header of RX response received. */ + return -EINVAL; + } + + if (reg & (RXPSR_CRCERR | RXPSR_WCERR | RXPSR_AXIERR | RXPSR_OVRERR)) { + /* Incorrect response payload. */ + return -ENODATA; + } + + data = rcar_mipi_dsi_read(dsi, RXPHDR); + if (data & RXPHDR_FMT) { /* Long Packet Response. */ + /* Read Long Packet Response length from packet header. */ + wc = data & 0xffff; + if (wc > msg->rx_len) { + dev_warn(dsi->dev, + "Long Packet Response longer than RX buffer (%d), limited to %zu Bytes\n", + wc, msg->rx_len); + wc = msg->rx_len; + } + + if (wc > 16) { + dev_warn(dsi->dev, + "Long Packet Response too long (%d), limited to 16 Bytes\n", + wc); + wc = 16; + } + + for (i = 0; i < msg->rx_len; i++) { + if (!(i % 4)) + data = rcar_mipi_dsi_read(dsi, RXPPD0R + i); + + rx_buf[i] = data & 0xff; + data >>= 8; + } + } else { /* Short Packet Response. */ + if (msg->rx_len >= 1) + rx_buf[0] = data & 0xff; + if (msg->rx_len >= 2) + rx_buf[1] = (data >> 8) & 0xff; + if (msg->rx_len >= 3) { + dev_warn(dsi->dev, + "Expected Short Packet Response too long (%zu), limited to 2 Bytes\n", + msg->rx_len); + } + } + } + + if (reg & RXPSR_RCVAKE) { + /* Acknowledge and Error report received. */ + return -EFAULT; + } + + /* Wait until the bus handover to host processor completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + !(status & PPIDL0SR_DIR), + 2000, 50000, false, dsi, PPIDL0SR); + if (ret < 0) { + dev_err(dsi->dev, "Command RX DIR timeout (0x%08x)\n", status); + return ret; + } + + /* Wait until the data lane is in LP11 stop state. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + status & PPIDL0SR_STPST, + 2000, 50000, false, dsi, PPIDL0SR); + if (ret < 0) { + dev_err(dsi->dev, "Command RX STPST timeout (0x%08x)\n", status); + return ret; + } + + return 0; +} + +static ssize_t rcar_mipi_dsi_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + const bool is_rx_xfer = (msg->flags & MIPI_DSI_MSG_REQ_ACK) || msg->rx_len; + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + int ret; + + if (msg->tx_len > 16 || msg->rx_len > 16) { + /* ToDo: Implement Memory on AXI bus command mode. */ + dev_warn(dsi->dev, + "Register-based command mode supports only up to 16 Bytes long payload\n"); + return -EOPNOTSUPP; + } + + ret = rcar_mipi_dsi_host_tx_transfer(host, msg, is_rx_xfer); + + /* If TX transfer succeeded and this transfer has RX part. */ + if (ret >= 0 && is_rx_xfer) { + ret = rcar_mipi_dsi_host_rx_transfer(host, msg); + if (ret) + return ret; + + ret = msg->rx_len; + } + + /* + * Wait a bit between commands, otherwise panels based on ILI9881C + * TCON may fail to correctly receive all commands sent to them. + * Until we can actually test with another DSI device, keep the + * delay here, but eventually this delay might have to be moved + * into the ILI9881C panel driver. + */ + usleep_range(1000, 2000); + + /* Clear the completion interrupt. */ + if (!msg->rx_len) + rcar_mipi_dsi_write(dsi, TXCMSR, TXCMSR_TXREQEND); + + return ret; +} + static const struct mipi_dsi_host_ops rcar_mipi_dsi_host_ops = { .attach = rcar_mipi_dsi_host_attach, .detach = rcar_mipi_dsi_host_detach, + .transfer = rcar_mipi_dsi_host_transfer }; /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h index a6b276f1d6ee..76521276e2af 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h @@ -12,6 +12,130 @@ #define LINKSR_LPBUSY (1 << 1) #define LINKSR_HSBUSY (1 << 0) +#define TXSETR 0x100 +#define TXSETR_LANECNT_MASK (0x3 << 0) + +/* + * DSI Command Transfer Registers + */ +#define TXCMSETR 0x110 +#define TXCMSETR_SPDTYP (1 << 8) /* 0:HS 1:LP */ +#define TXCMSETR_LPPDACC (1 << 0) +#define TXCMCR 0x120 +#define TXCMCR_BTATYP (1 << 2) +#define TXCMCR_BTAREQ (1 << 1) +#define TXCMCR_TXREQ (1 << 0) +#define TXCMSR 0x130 +#define TXCMSR_CLSNERR (1 << 18) +#define TXCMSR_AXIERR (1 << 16) +#define TXCMSR_TXREQEND (1 << 0) +#define TXCMSCR 0x134 +#define TXCMSCR_CLSNERR (1 << 18) +#define TXCMSCR_AXIERR (1 << 16) +#define TXCMSCR_TXREQEND (1 << 0) +#define TXCMIER 0x138 +#define TXCMIER_CLSNERR (1 << 18) +#define TXCMIER_AXIERR (1 << 16) +#define TXCMIER_TXREQEND (1 << 0) +#define TXCMADDRSET0R 0x140 +#define TXCMPHDR 0x150 +#define TXCMPHDR_FMT (1 << 24) /* 0:SP 1:LP */ +#define TXCMPHDR_VC(n) (((n) & 0x3) << 22) +#define TXCMPHDR_DT(n) (((n) & 0x3f) << 16) +#define TXCMPHDR_DATA1(n) (((n) & 0xff) << 8) +#define TXCMPHDR_DATA0(n) (((n) & 0xff) << 0) +#define TXCMPPD0R 0x160 +#define TXCMPPD1R 0x164 +#define TXCMPPD2R 0x168 +#define TXCMPPD3R 0x16c + +#define RXSETR 0x200 +#define RXSETR_CRCEN (((n) & 0xf) << 24) +#define RXSETR_ECCEN (((n) & 0xf) << 16) +#define RXPSETR 0x210 +#define RXPSETR_LPPDACC (1 << 0) +#define RXPSR 0x220 +#define RXPSR_ECCERR1B (1 << 28) +#define RXPSR_UEXTRGERR (1 << 25) +#define RXPSR_RESPTOERR (1 << 24) +#define RXPSR_OVRERR (1 << 23) +#define RXPSR_AXIERR (1 << 22) +#define RXPSR_CRCERR (1 << 21) +#define RXPSR_WCERR (1 << 20) +#define RXPSR_UEXDTERR (1 << 19) +#define RXPSR_UEXPKTERR (1 << 18) +#define RXPSR_ECCERR (1 << 17) +#define RXPSR_MLFERR (1 << 16) +#define RXPSR_RCVACK (1 << 14) +#define RXPSR_RCVEOT (1 << 10) +#define RXPSR_RCVAKE (1 << 9) +#define RXPSR_RCVRESP (1 << 8) +#define RXPSR_BTAREQEND (1 << 0) +#define RXPSCR 0x224 +#define RXPSCR_ECCERR1B (1 << 28) +#define RXPSCR_UEXTRGERR (1 << 25) +#define RXPSCR_RESPTOERR (1 << 24) +#define RXPSCR_OVRERR (1 << 23) +#define RXPSCR_AXIERR (1 << 22) +#define RXPSCR_CRCERR (1 << 21) +#define RXPSCR_WCERR (1 << 20) +#define RXPSCR_UEXDTERR (1 << 19) +#define RXPSCR_UEXPKTERR (1 << 18) +#define RXPSCR_ECCERR (1 << 17) +#define RXPSCR_MLFERR (1 << 16) +#define RXPSCR_RCVACK (1 << 14) +#define RXPSCR_RCVEOT (1 << 10) +#define RXPSCR_RCVAKE (1 << 9) +#define RXPSCR_RCVRESP (1 << 8) +#define RXPSCR_BTAREQEND (1 << 0) +#define RXPIER 0x228 +#define RXPIER_ECCERR1B (1 << 28) +#define RXPIER_UEXTRGERR (1 << 25) +#define RXPIER_RESPTOERR (1 << 24) +#define RXPIER_OVRERR (1 << 23) +#define RXPIER_AXIERR (1 << 22) +#define RXPIER_CRCERR (1 << 21) +#define RXPIER_WCERR (1 << 20) +#define RXPIER_UEXDTERR (1 << 19) +#define RXPIER_UEXPKTERR (1 << 18) +#define RXPIER_ECCERR (1 << 17) +#define RXPIER_MLFERR (1 << 16) +#define RXPIER_RCVACK (1 << 14) +#define RXPIER_RCVEOT (1 << 10) +#define RXPIER_RCVAKE (1 << 9) +#define RXPIER_RCVRESP (1 << 8) +#define RXPIER_BTAREQEND (1 << 0) +#define RXPADDRSET0R 0x230 +#define RXPSIZESETR 0x238 +#define RXPSIZESETR_SIZE(n) (((n) & 0xf) << 3) +#define RXPHDR 0x240 +#define RXPHDR_FMT (1 << 24) /* 0:SP 1:LP */ +#define RXPHDR_VC(n) (((n) & 0x3) << 22) +#define RXPHDR_DT(n) (((n) & 0x3f) << 16) +#define RXPHDR_DATA1(n) (((n) & 0xff) << 8) +#define RXPHDR_DATA0(n) (((n) & 0xff) << 0) +#define RXPPD0R 0x250 +#define RXPPD1R 0x254 +#define RXPPD2R 0x258 +#define RXPPD3R 0x25c +#define AKEPR 0x300 +#define AKEPR_VC(n) (((n) & 0x3) << 22) +#define AKEPR_DT(n) (((n) & 0x3f) << 16) +#define AKEPR_ERRRPT(n) (((n) & 0xffff) << 0) +#define RXRESPTOSETR 0x400 +#define TACR 0x500 +#define TASR 0x510 +#define TASCR 0x514 +#define TAIER 0x518 +#define TOSR 0x610 +#define TOSR_TATO (1 << 2) +#define TOSR_LRXHTO (1 << 1) +#define TOSR_HRXTO (1 << 0) +#define TOSCR 0x614 +#define TOSCR_TATO (1 << 2) +#define TOSCR_LRXHTO (1 << 1) +#define TOSCR_HRXTO (1 << 0) + /* * Video Mode Register */ @@ -80,10 +204,7 @@ * PHY-Protocol Interface (PPI) Registers */ #define PPISETR 0x700 -#define PPISETR_DLEN_0 (0x1 << 0) -#define PPISETR_DLEN_1 (0x3 << 0) -#define PPISETR_DLEN_2 (0x7 << 0) -#define PPISETR_DLEN_3 (0xf << 0) +#define PPISETR_DLEN_MASK (0xf << 0) #define PPISETR_CLEN (1 << 8) #define PPICLCR 0x710 @@ -100,6 +221,10 @@ #define PPICLSCR_HSTOLP (1 << 27) #define PPICLSCR_TOHS (1 << 26) +#define PPIDL0SR 0x740 +#define PPIDL0SR_DIR (1 << 10) +#define PPIDL0SR_STPST (1 << 6) + #define PPIDLSR 0x760 #define PPIDLSR_STPST (0xf << 0) diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index ab525668939a..b7b025814e72 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -10,6 +10,7 @@ config DRM_ROCKCHIP select VIDEOMODE_HELPERS select DRM_ANALOGIX_DP if ROCKCHIP_ANALOGIX_DP select DRM_DISPLAY_DP_AUX_BUS if ROCKCHIP_ANALOGIX_DP + select DRM_DW_DP if ROCKCHIP_DW_DP select DRM_DW_HDMI if ROCKCHIP_DW_HDMI select DRM_DW_HDMI_QP if ROCKCHIP_DW_HDMI_QP select DRM_DW_MIPI_DSI if ROCKCHIP_DW_MIPI_DSI @@ -53,6 +54,7 @@ config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions @@ -60,6 +62,14 @@ config ROCKCHIP_CDN_DP RK3399 based SoC, you should select this option. +config ROCKCHIP_DW_DP + bool "Rockchip specific extensions for Synopsys DW DP" + help + This selects support for Rockchip SoC specific extensions + to enable Synopsys DesignWare Cores based DisplayPort transmit + controller support on Rockchip SoC, If you want to enable DP on + rk3588 based SoC, you should select this option. + config ROCKCHIP_DW_HDMI bool "Rockchip specific extensions for Synopsys DW HDMI" help diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index 2b867cebbc12..097f062399c7 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -14,6 +14,7 @@ rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI_QP) += dw_hdmi_qp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI2) += dw-mipi-dsi2-rockchip.o +rockchipdrm-$(CONFIG_ROCKCHIP_DW_DP) += dw_dp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o rockchipdrm-$(CONFIG_ROCKCHIP_LVDS) += rockchip_lvds.o rockchipdrm-$(CONFIG_ROCKCHIP_RGB) += rockchip_rgb.o diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c index cdd490778756..0aea764e29b2 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c @@ -437,6 +437,15 @@ static void dw_mipi_dsi2_rockchip_remove(struct platform_device *pdev) dw_mipi_dsi2_remove(dsi2->dmd); } +static const struct dsigrf_reg rk3576_dsi_grf_reg_fields[MAX_FIELDS] = { + [TXREQCLKHS_EN] = { 0x0028, 1, 1 }, + [GATING_EN] = { 0x0028, 0, 0 }, + [IPI_SHUTDN] = { 0x0028, 3, 3 }, + [IPI_COLORM] = { 0x0028, 2, 2 }, + [IPI_COLOR_DEPTH] = { 0x0028, 8, 11 }, + [IPI_FORMAT] = { 0x0028, 4, 7 }, +}; + static const struct dsigrf_reg rk3588_dsi0_grf_reg_fields[MAX_FIELDS] = { [TXREQCLKHS_EN] = { 0x0000, 11, 11 }, [GATING_EN] = { 0x0000, 10, 10 }, @@ -455,6 +464,15 @@ static const struct dsigrf_reg rk3588_dsi1_grf_reg_fields[MAX_FIELDS] = { [IPI_FORMAT] = { 0x0004, 0, 3 }, }; +static const struct rockchip_dw_dsi2_chip_data rk3576_chip_data[] = { + { + .reg = 0x27d80000, + .grf_regs = rk3576_dsi_grf_reg_fields, + .max_bit_rate_per_lane = 2500000ULL, + }, + { /* sentinel */ } +}; + static const struct rockchip_dw_dsi2_chip_data rk3588_chip_data[] = { { .reg = 0xfde20000, @@ -470,6 +488,9 @@ static const struct rockchip_dw_dsi2_chip_data rk3588_chip_data[] = { static const struct of_device_id dw_mipi_dsi2_rockchip_dt_ids[] = { { + .compatible = "rockchip,rk3576-mipi-dsi2", + .data = &rk3576_chip_data, + }, { .compatible = "rockchip,rk3588-mipi-dsi2", .data = &rk3588_chip_data, }, diff --git a/drivers/gpu/drm/rockchip/dw_dp-rockchip.c b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c new file mode 100644 index 000000000000..25ab4e46301e --- /dev/null +++ b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Rockchip Electronics Co., Ltd. + * + * Author: Zhang Yubing <yubing.zhang@rock-chips.com> + * Author: Andy Yan <andy.yan@rock-chips.com> + */ + +#include <linux/component.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <drm/bridge/dw_dp.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> +#include <drm/drm_of.h> +#include <drm/drm_print.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> + +#include <linux/media-bus-format.h> +#include <linux/videodev2.h> + +#include "rockchip_drm_drv.h" +#include "rockchip_drm_vop.h" + +struct rockchip_dw_dp { + struct dw_dp *base; + struct device *dev; + struct rockchip_encoder encoder; +}; + +static int dw_dp_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state); + struct drm_atomic_state *state = conn_state->state; + struct drm_display_info *di = &conn_state->connector->display_info; + struct drm_bridge *bridge = drm_bridge_chain_get_first_bridge(encoder); + struct drm_bridge_state *bridge_state = drm_atomic_get_new_bridge_state(state, bridge); + u32 bus_format = bridge_state->input_bus_cfg.format; + + switch (bus_format) { + case MEDIA_BUS_FMT_UYYVYY10_0_5X30: + case MEDIA_BUS_FMT_UYYVYY8_0_5X24: + s->output_mode = ROCKCHIP_OUT_MODE_YUV420; + break; + case MEDIA_BUS_FMT_YUYV10_1X20: + case MEDIA_BUS_FMT_YUYV8_1X16: + s->output_mode = ROCKCHIP_OUT_MODE_S888_DUMMY; + break; + case MEDIA_BUS_FMT_RGB101010_1X30: + case MEDIA_BUS_FMT_RGB888_1X24: + case MEDIA_BUS_FMT_RGB666_1X24_CPADHI: + case MEDIA_BUS_FMT_YUV10_1X30: + case MEDIA_BUS_FMT_YUV8_1X24: + default: + s->output_mode = ROCKCHIP_OUT_MODE_AAAA; + break; + } + + s->output_type = DRM_MODE_CONNECTOR_DisplayPort; + s->bus_format = bus_format; + s->bus_flags = di->bus_flags; + s->color_space = V4L2_COLORSPACE_DEFAULT; + + return 0; +} + +static const struct drm_encoder_helper_funcs dw_dp_encoder_helper_funcs = { + .atomic_check = dw_dp_encoder_atomic_check, +}; + +static int dw_dp_rockchip_bind(struct device *dev, struct device *master, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dp_plat_data plat_data; + struct drm_device *drm_dev = data; + struct rockchip_dw_dp *dp; + struct drm_encoder *encoder; + struct drm_connector *connector; + int ret; + + dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); + if (!dp) + return -ENOMEM; + + dp->dev = dev; + platform_set_drvdata(pdev, dp); + + plat_data.max_link_rate = 810000; + encoder = &dp->encoder.encoder; + encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev, dev->of_node); + rockchip_drm_encoder_set_crtc_endpoint_id(&dp->encoder, dev->of_node, 0, 0); + + ret = drmm_encoder_init(drm_dev, encoder, NULL, DRM_MODE_ENCODER_TMDS, NULL); + if (ret) + return ret; + drm_encoder_helper_add(encoder, &dw_dp_encoder_helper_funcs); + + dp->base = dw_dp_bind(dev, encoder, &plat_data); + if (IS_ERR(dp->base)) { + ret = PTR_ERR(dp->base); + return ret; + } + + connector = drm_bridge_connector_init(drm_dev, encoder); + if (IS_ERR(connector)) { + ret = PTR_ERR(connector); + return dev_err_probe(dev, ret, "Failed to init bridge connector"); + } + + drm_connector_attach_encoder(connector, encoder); + + return 0; +} + +static const struct component_ops dw_dp_rockchip_component_ops = { + .bind = dw_dp_rockchip_bind, +}; + +static int dw_dp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + return component_add(dev, &dw_dp_rockchip_component_ops); +} + +static void dw_dp_remove(struct platform_device *pdev) +{ + struct rockchip_dw_dp *dp = platform_get_drvdata(pdev); + + component_del(dp->dev, &dw_dp_rockchip_component_ops); +} + +static const struct of_device_id dw_dp_of_match[] = { + { .compatible = "rockchip,rk3588-dp", }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dp_of_match); + +struct platform_driver dw_dp_driver = { + .probe = dw_dp_probe, + .remove = dw_dp_remove, + .driver = { + .name = "dw-dp", + .of_match_table = dw_dp_of_match, + }, +}; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 180fad5d49ad..eb77bde9f628 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -529,6 +529,7 @@ static int __init rockchip_drm_init(void) ADD_ROCKCHIP_SUB_DRIVER(rockchip_dp_driver, CONFIG_ROCKCHIP_ANALOGIX_DP); ADD_ROCKCHIP_SUB_DRIVER(cdn_dp_driver, CONFIG_ROCKCHIP_CDN_DP); + ADD_ROCKCHIP_SUB_DRIVER(dw_dp_driver, CONFIG_ROCKCHIP_DW_DP); ADD_ROCKCHIP_SUB_DRIVER(dw_hdmi_rockchip_pltfm_driver, CONFIG_ROCKCHIP_DW_HDMI); ADD_ROCKCHIP_SUB_DRIVER(dw_hdmi_qp_rockchip_pltfm_driver, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index c183e82a42a5..2e86ad00979c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -87,6 +87,7 @@ int rockchip_drm_encoder_set_crtc_endpoint_id(struct rockchip_encoder *rencoder, struct device_node *np, int port, int reg); int rockchip_drm_endpoint_is_subdriver(struct device_node *ep); extern struct platform_driver cdn_dp_driver; +extern struct platform_driver dw_dp_driver; extern struct platform_driver dw_hdmi_rockchip_pltfm_driver; extern struct platform_driver dw_hdmi_qp_rockchip_pltfm_driver; extern struct platform_driver dw_mipi_dsi_rockchip_driver; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 186f6452a7d3..b50927a824b4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -2579,12 +2579,13 @@ static int vop2_win_init(struct vop2 *vop2) } /* - * The window registers are only updated when config done is written. - * Until that they read back the old value. As we read-modify-write - * these registers mark them as non-volatile. This makes sure we read - * the new values from the regmap register cache. + * The window and video port registers are only updated when config + * done is written. Until that they read back the old value. As we + * read-modify-write these registers mark them as non-volatile. This + * makes sure we read the new values from the regmap register cache. */ static const struct regmap_range vop2_nonvolatile_range[] = { + regmap_reg_range(RK3568_VP0_CTRL_BASE, RK3588_VP3_CTRL_BASE + 255), regmap_reg_range(0x1000, 0x23ff), }; diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 8867b95ab089..5a4697f636f2 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -285,9 +285,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) return 0; sched = entity->rq->sched; - /** - * The client will not queue more IBs during this fini, consume existing - * queued IBs or discard them on SIGKILL + /* + * The client will not queue more jobs during this fini - consume + * existing queued ones, or discard them on SIGKILL. */ if (current->flags & PF_EXITING) { if (timeout) @@ -300,7 +300,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) drm_sched_entity_is_idle(entity)); } - /* For killed process disable any more IBs enqueue right now */ + /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); if ((!last_user || last_user == current->group_leader) && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) @@ -324,9 +324,9 @@ EXPORT_SYMBOL(drm_sched_entity_flush); void drm_sched_entity_fini(struct drm_sched_entity *entity) { /* - * If consumption of existing IBs wasn't completed. Forcefully remove - * them here. Also makes sure that the scheduler won't touch this entity - * any more. + * If consumption of existing jobs wasn't completed forcefully remove + * them. Also makes sure that the scheduler won't touch this entity any + * more. */ drm_sched_entity_kill(entity); @@ -391,7 +391,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); * Add a callback to the current dependency of the entity to wake up the * scheduler when the entity becomes available. */ -static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) +static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity, + struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = entity->rq->sched; struct dma_fence *fence = entity->dependency; @@ -421,6 +422,10 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) entity->dependency = fence; } + if (trace_drm_sched_job_unschedulable_enabled() && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags)) + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, drm_sched_entity_wakeup)) return true; @@ -461,10 +466,8 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - if (drm_sched_entity_add_dependency_cb(entity)) { - trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity, sched_job)) return NULL; - } } /* skip jobs from entity that marked guilty */ diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 5a550fd76bf0..46119aacb809 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1424,6 +1424,22 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) * Prevents reinsertion and marks job_queue as idle, * it will be removed from the rq in drm_sched_entity_fini() * eventually + * + * FIXME: + * This lacks the proper spin_lock(&s_entity->lock) and + * is, therefore, a race condition. Most notably, it + * can race with drm_sched_entity_push_job(). The lock + * cannot be taken here, however, because this would + * lead to lock inversion -> deadlock. + * + * The best solution probably is to enforce the life + * time rule of all entities having to be torn down + * before their scheduler. Then, however, locking could + * be dropped alltogether from this function. + * + * For now, this remains a potential race in all + * drivers that keep entities alive for longer than + * the scheduler. */ s_entity->stopped = true; spin_unlock(&rq->lock); diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index 5b262126b776..7f31d35780cc 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -11,7 +11,6 @@ #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/list.h> -#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/types.h> diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index 453eb7e045e5..a6c4a6738ded 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -151,6 +151,7 @@ struct st7571_device { bool ignore_nak; bool grayscale; + bool inverted; u32 height_mm; u32 width_mm; u32 startline; @@ -218,10 +219,11 @@ static int st7571_send_command_list(struct st7571_device *st7571, return ret; } -static inline u8 st7571_transform_xy(const char *p, int x, int y) +static inline u8 st7571_transform_xy(const char *p, int x, int y, u8 bpp) { int xrest = x % 8; u8 result = 0; + u8 row_len = 16 * bpp; /* * Transforms an (x, y) pixel coordinate into a vertical 8-bit @@ -236,7 +238,7 @@ static inline u8 st7571_transform_xy(const char *p, int x, int y) for (int i = 0; i < 8; i++) { int row_idx = y + i; - u8 byte = p[row_idx * 16 + x]; + u8 byte = p[row_idx * row_len + x]; u8 bit = (byte >> xrest) & 1; result |= (bit << i); @@ -303,11 +305,11 @@ static void st7571_prepare_buffer_grayscale(struct st7571_device *st7571, struct iosys_map dst; switch (fb->format->format) { - case DRM_FORMAT_XRGB8888: /* Only support XRGB8888 in monochrome mode */ - dst_pitch = DIV_ROUND_UP(drm_rect_width(rect), 8); + case DRM_FORMAT_XRGB8888: + dst_pitch = DIV_ROUND_UP(drm_rect_width(rect), 4); iosys_map_set_vaddr(&dst, st7571->hwbuf); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); + drm_fb_xrgb8888_to_gray2(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); break; case DRM_FORMAT_R1: @@ -333,7 +335,7 @@ static int st7571_fb_update_rect_monochrome(struct drm_framebuffer *fb, struct d for (int y = rect->y1; y < rect->y2; y += ST7571_PAGE_HEIGHT) { for (int x = rect->x1; x < rect->x2; x++) - row[x] = st7571_transform_xy(st7571->hwbuf, x, y); + row[x] = st7571_transform_xy(st7571->hwbuf, x, y, 1); st7571_set_position(st7571, rect->x1, y); @@ -358,14 +360,13 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr rect->y2 = min_t(unsigned int, round_up(rect->y2, ST7571_PAGE_HEIGHT), st7571->nlines); switch (format) { - case DRM_FORMAT_XRGB8888: - /* Threated as monochrome (R1) */ - fallthrough; case DRM_FORMAT_R1: - x1 = rect->x1; - x2 = rect->x2; + x1 = rect->x1 * 1; + x2 = rect->x2 * 1; break; case DRM_FORMAT_R2: + fallthrough; + case DRM_FORMAT_XRGB8888: x1 = rect->x1 * 2; x2 = rect->x2 * 2; break; @@ -373,7 +374,7 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr for (int y = rect->y1; y < rect->y2; y += ST7571_PAGE_HEIGHT) { for (int x = x1; x < x2; x++) - row[x] = st7571_transform_xy(st7571->hwbuf, x, y); + row[x] = st7571_transform_xy(st7571->hwbuf, x, y, 2); st7571_set_position(st7571, rect->x1, y); @@ -386,15 +387,15 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr * even if the format is monochrome. * * The bit values maps to the following grayscale: - * 0 0 = White - * 0 1 = Light gray - * 1 0 = Dark gray - * 1 1 = Black + * 0 0 = Black + * 0 1 = Dark gray + * 1 0 = Light gray + * 1 1 = White * * For monochrome formats, write the same value twice to get * either a black or white pixel. */ - if (format == DRM_FORMAT_R1 || format == DRM_FORMAT_XRGB8888) + if (format == DRM_FORMAT_R1) regmap_bulk_write(st7571->regmap, ST7571_DATA_MODE, row + x, 1); } } @@ -792,6 +793,7 @@ static int st7567_parse_dt(struct st7571_device *st7567) of_property_read_u32(np, "width-mm", &st7567->width_mm); of_property_read_u32(np, "height-mm", &st7567->height_mm); + st7567->inverted = of_property_read_bool(np, "sitronix,inverted"); st7567->pformat = &st7571_monochrome; st7567->bpp = 1; @@ -819,6 +821,7 @@ static int st7571_parse_dt(struct st7571_device *st7571) of_property_read_u32(np, "width-mm", &st7571->width_mm); of_property_read_u32(np, "height-mm", &st7571->height_mm); st7571->grayscale = of_property_read_bool(np, "sitronix,grayscale"); + st7571->inverted = of_property_read_bool(np, "sitronix,inverted"); if (st7571->grayscale) { st7571->pformat = &st7571_grayscale; @@ -873,7 +876,7 @@ static int st7567_lcd_init(struct st7571_device *st7567) ST7571_SET_POWER(0x6), /* Power Control, VC: ON, VR: ON, VF: OFF */ ST7571_SET_POWER(0x7), /* Power Control, VC: ON, VR: ON, VF: ON */ - ST7571_SET_REVERSE(0), + ST7571_SET_REVERSE(st7567->inverted ? 1 : 0), ST7571_SET_ENTIRE_DISPLAY_ON(0), }; @@ -917,7 +920,7 @@ static int st7571_lcd_init(struct st7571_device *st7571) ST7571_SET_COLOR_MODE(st7571->pformat->mode), ST7571_COMMAND_SET_NORMAL, - ST7571_SET_REVERSE(0), + ST7571_SET_REVERSE(st7571->inverted ? 1 : 0), ST7571_SET_ENTIRE_DISPLAY_ON(0), }; @@ -1024,7 +1027,7 @@ static void st7571_remove(struct i2c_client *client) drm_dev_unplug(&st7571->dev); } -struct st7571_panel_data st7567_config = { +static const struct st7571_panel_data st7567_config = { .init = st7567_lcd_init, .parse_dt = st7567_parse_dt, .constraints = { @@ -1036,7 +1039,7 @@ struct st7571_panel_data st7567_config = { }, }; -struct st7571_panel_data st7571_config = { +static const struct st7571_panel_data st7571_config = { .init = st7571_lcd_init, .parse_dt = st7571_parse_dt, .constraints = { diff --git a/drivers/gpu/drm/solomon/ssd130x-spi.c b/drivers/gpu/drm/solomon/ssd130x-spi.c index 7c935870f7d2..b52f5fd592a1 100644 --- a/drivers/gpu/drm/solomon/ssd130x-spi.c +++ b/drivers/gpu/drm/solomon/ssd130x-spi.c @@ -74,8 +74,7 @@ static int ssd130x_spi_probe(struct spi_device *spi) t = devm_kzalloc(dev, sizeof(*t), GFP_KERNEL); if (!t) - return dev_err_probe(dev, -ENOMEM, - "Failed to allocate SPI transport data\n"); + return -ENOMEM; t->spi = spi; t->dc = dc; diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c index 03684062309b..b76606e9a82d 100644 --- a/drivers/gpu/drm/sti/sti_hqvdp.c +++ b/drivers/gpu/drm/sti/sti_hqvdp.c @@ -744,7 +744,7 @@ static bool sti_hqvdp_check_hw_scaling(struct sti_hqvdp *hqvdp, inv_zy = DIV_ROUND_UP(src_h, dst_h); - return (inv_zy <= lfw) ? true : false; + return inv_zy <= lfw; } /** diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index 8ebcaf953782..ab00d1a6140c 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -236,8 +236,18 @@ static void stm_drm_platform_shutdown(struct platform_device *pdev) drm_atomic_helper_shutdown(platform_get_drvdata(pdev)); } +static struct ltdc_plat_data stm_drm_plat_data = { + .pad_max_freq_hz = 90000000, +}; + +static struct ltdc_plat_data stm_drm_plat_data_mp25 = { + .pad_max_freq_hz = 150000000, +}; + static const struct of_device_id drv_dt_ids[] = { - { .compatible = "st,stm32-ltdc"}, + { .compatible = "st,stm32-ltdc", .data = &stm_drm_plat_data, }, + { .compatible = "st,stm32mp251-ltdc", .data = &stm_drm_plat_data_mp25, }, + { .compatible = "st,stm32mp255-ltdc", .data = &stm_drm_plat_data_mp25, }, { /* end node */ }, }; MODULE_DEVICE_TABLE(of, drv_dt_ids); diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index ba315c66a04d..d1501e86a5b1 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/of_graph.h> #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> @@ -51,6 +52,7 @@ #define HWVER_10300 0x010300 #define HWVER_20101 0x020101 #define HWVER_40100 0x040100 +#define HWVER_40101 0x040101 /* * The address of some registers depends on the HW version: such registers have @@ -641,7 +643,7 @@ static inline void ltdc_set_ycbcr_config(struct drm_plane *plane, u32 drm_pix_fm break; default: /* RGB or not a YCbCr supported format */ - DRM_ERROR("Unsupported pixel format: %u\n", drm_pix_fmt); + drm_err(plane->dev, "Unsupported pixel format: %u\n", drm_pix_fmt); return; } @@ -664,18 +666,19 @@ static inline void ltdc_set_ycbcr_coeffs(struct drm_plane *plane) u32 lofs = plane->index * LAY_OFS; if (enc != DRM_COLOR_YCBCR_BT601 && enc != DRM_COLOR_YCBCR_BT709) { - DRM_ERROR("color encoding %d not supported, use bt601 by default\n", enc); + drm_err(plane->dev, "color encoding %d not supported, use bt601 by default\n", enc); /* set by default color encoding to DRM_COLOR_YCBCR_BT601 */ enc = DRM_COLOR_YCBCR_BT601; } if (ran != DRM_COLOR_YCBCR_LIMITED_RANGE && ran != DRM_COLOR_YCBCR_FULL_RANGE) { - DRM_ERROR("color range %d not supported, use limited range by default\n", ran); + drm_err(plane->dev, + "color range %d not supported, use limited range by default\n", ran); /* set by default color range to DRM_COLOR_YCBCR_LIMITED_RANGE */ ran = DRM_COLOR_YCBCR_LIMITED_RANGE; } - DRM_DEBUG_DRIVER("Color encoding=%d, range=%d\n", enc, ran); + drm_err(plane->dev, "Color encoding=%d, range=%d\n", enc, ran); regmap_write(ldev->regmap, LTDC_L1CYR0R + lofs, ltdc_ycbcr2rgb_coeffs[enc][ran][0]); regmap_write(ldev->regmap, LTDC_L1CYR1R + lofs, @@ -774,7 +777,7 @@ static void ltdc_crtc_atomic_enable(struct drm_crtc *crtc, struct ltdc_device *ldev = crtc_to_ltdc(crtc); struct drm_device *ddev = crtc->dev; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); pm_runtime_get_sync(ddev->dev); @@ -798,7 +801,7 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_device *ddev = crtc->dev; int layer_index = 0; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); drm_crtc_vblank_off(crtc); @@ -835,9 +838,15 @@ ltdc_crtc_mode_valid(struct drm_crtc *crtc, int target_max = target + CLK_TOLERANCE_HZ; int result; + if (ldev->lvds_clk) { + result = clk_round_rate(ldev->lvds_clk, target); + drm_dbg_driver(crtc->dev, "lvds pixclk rate target %d, available %d\n", + target, result); + } + result = clk_round_rate(ldev->pixel_clk, target); - DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result); + drm_dbg_driver(crtc->dev, "clk rate target %d, available %d\n", target, result); /* Filter modes according to the max frequency supported by the pads */ if (result > ldev->caps.pad_max_freq_hz) @@ -872,14 +881,14 @@ static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc, int rate = mode->clock * 1000; if (clk_set_rate(ldev->pixel_clk, rate) < 0) { - DRM_ERROR("Cannot set rate (%dHz) for pixel clk\n", rate); + drm_err(crtc->dev, "Cannot set rate (%dHz) for pixel clk\n", rate); return false; } adjusted_mode->clock = clk_get_rate(ldev->pixel_clk) / 1000; - DRM_DEBUG_DRIVER("requested clock %dkHz, adjusted clock %dkHz\n", - mode->clock, adjusted_mode->clock); + drm_dbg_driver(crtc->dev, "requested clock %dkHz, adjusted clock %dkHz\n", + mode->clock, adjusted_mode->clock); return true; } @@ -934,20 +943,20 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc) if (!pm_runtime_active(ddev->dev)) { ret = pm_runtime_get_sync(ddev->dev); if (ret) { - DRM_ERROR("Failed to set mode, cannot get sync\n"); + drm_err(crtc->dev, "Failed to set mode, cannot get sync\n"); return; } } - DRM_DEBUG_DRIVER("CRTC:%d mode:%s\n", crtc->base.id, mode->name); - DRM_DEBUG_DRIVER("Video mode: %dx%d", mode->hdisplay, mode->vdisplay); - DRM_DEBUG_DRIVER(" hfp %d hbp %d hsl %d vfp %d vbp %d vsl %d\n", - mode->hsync_start - mode->hdisplay, - mode->htotal - mode->hsync_end, - mode->hsync_end - mode->hsync_start, - mode->vsync_start - mode->vdisplay, - mode->vtotal - mode->vsync_end, - mode->vsync_end - mode->vsync_start); + drm_dbg_driver(crtc->dev, "CRTC:%d mode:%s\n", crtc->base.id, mode->name); + drm_dbg_driver(crtc->dev, "Video mode: %dx%d", mode->hdisplay, mode->vdisplay); + drm_dbg_driver(crtc->dev, " hfp %d hbp %d hsl %d vfp %d vbp %d vsl %d\n", + mode->hsync_start - mode->hdisplay, + mode->htotal - mode->hsync_end, + mode->hsync_end - mode->hsync_start, + mode->vsync_start - mode->vdisplay, + mode->vtotal - mode->vsync_end, + mode->vsync_end - mode->vsync_start); /* Convert video timings to ltdc timings */ hsync = mode->hsync_end - mode->hsync_start - 1; @@ -1033,7 +1042,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_device *ddev = crtc->dev; struct drm_pending_vblank_event *event = crtc->state->event; - DRM_DEBUG_ATOMIC("\n"); + drm_dbg_atomic(crtc->dev, "\n"); ltdc_crtc_update_clut(crtc); @@ -1121,7 +1130,7 @@ static int ltdc_crtc_enable_vblank(struct drm_crtc *crtc) struct ltdc_device *ldev = crtc_to_ltdc(crtc); struct drm_crtc_state *state = crtc->state; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); if (state->enable) regmap_set_bits(ldev->regmap, LTDC_IER, IER_LIE); @@ -1135,7 +1144,7 @@ static void ltdc_crtc_disable_vblank(struct drm_crtc *crtc) { struct ltdc_device *ldev = crtc_to_ltdc(crtc); - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); regmap_clear_bits(ldev->regmap, LTDC_IER, IER_LIE); } @@ -1144,11 +1153,11 @@ static int ltdc_crtc_set_crc_source(struct drm_crtc *crtc, const char *source) struct ltdc_device *ldev; int ret; - DRM_DEBUG_DRIVER("\n"); - if (!crtc) return -ENODEV; + drm_dbg_driver(crtc->dev, "\n"); + ldev = crtc_to_ltdc(crtc); if (source && strcmp(source, "auto") == 0) { @@ -1168,14 +1177,14 @@ static int ltdc_crtc_set_crc_source(struct drm_crtc *crtc, const char *source) static int ltdc_crtc_verify_crc_source(struct drm_crtc *crtc, const char *source, size_t *values_cnt) { - DRM_DEBUG_DRIVER("\n"); - if (!crtc) return -ENODEV; + drm_dbg_driver(crtc->dev, "\n"); + if (source && strcmp(source, "auto") != 0) { - DRM_DEBUG_DRIVER("Unknown CRC source %s for %s\n", - source, crtc->name); + drm_dbg_driver(crtc->dev, "Unknown CRC source %s for %s\n", + source, crtc->name); return -EINVAL; } @@ -1233,7 +1242,7 @@ static int ltdc_plane_atomic_check(struct drm_plane *plane, struct drm_framebuffer *fb = new_plane_state->fb; u32 src_w, src_h; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(plane->dev, "\n"); if (!fb) return 0; @@ -1244,7 +1253,7 @@ static int ltdc_plane_atomic_check(struct drm_plane *plane, /* Reject scaling */ if (src_w != new_plane_state->crtc_w || src_h != new_plane_state->crtc_h) { - DRM_DEBUG_DRIVER("Scaling is not supported"); + drm_dbg_driver(plane->dev, "Scaling is not supported"); return -EINVAL; } @@ -1270,7 +1279,7 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, enum ltdc_pix_fmt pf; if (!newstate->crtc || !fb) { - DRM_DEBUG_DRIVER("fb or crtc NULL"); + drm_dbg_driver(plane->dev, "fb or crtc NULL"); return; } @@ -1280,11 +1289,11 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, src_w = newstate->src_w >> 16; src_h = newstate->src_h >> 16; - DRM_DEBUG_DRIVER("plane:%d fb:%d (%dx%d)@(%d,%d) -> (%dx%d)@(%d,%d)\n", - plane->base.id, fb->base.id, - src_w, src_h, src_x, src_y, - newstate->crtc_w, newstate->crtc_h, - newstate->crtc_x, newstate->crtc_y); + drm_dbg_driver(plane->dev, "plane:%d fb:%d (%dx%d)@(%d,%d) -> (%dx%d)@(%d,%d)\n", + plane->base.id, fb->base.id, + src_w, src_h, src_x, src_y, + newstate->crtc_w, newstate->crtc_h, + newstate->crtc_x, newstate->crtc_y); regmap_read(ldev->regmap, LTDC_BPCR, &bpcr); @@ -1312,8 +1321,8 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, val = ltdc_set_flexible_pixel_format(plane, pf); if (val == NB_PF) { - DRM_ERROR("Pixel format %.4s not supported\n", - (char *)&fb->format->format); + drm_err(fb->dev, "Pixel format %.4s not supported\n", + (char *)&fb->format->format); val = 0; /* set by default ARGB 32 bits */ } regmap_write_bits(ldev->regmap, LTDC_L1PFCR + lofs, LXPFCR_PF, val); @@ -1350,7 +1359,7 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, if (newstate->rotation & DRM_MODE_REFLECT_Y) paddr += (fb->pitches[0] * (y1 - y0)); - DRM_DEBUG_DRIVER("fb: phys 0x%08x", paddr); + drm_dbg_driver(fb->dev, "fb: phys 0x%08x", paddr); regmap_write(ldev->regmap, LTDC_L1CFBAR + lofs, paddr); /* Configures the color frame buffer pitch in bytes & line length */ @@ -1517,8 +1526,8 @@ static void ltdc_plane_atomic_disable(struct drm_plane *plane, regmap_write_bits(ldev->regmap, LTDC_L1RCR + lofs, LXRCR_IMR | LXRCR_VBR | LXRCR_GRMSK, LXRCR_VBR); - DRM_DEBUG_DRIVER("CRTC:%d plane:%d\n", - oldstate->crtc->base.id, plane->base.id); + drm_dbg_driver(plane->dev, "CRTC:%d plane:%d\n", + oldstate->crtc->base.id, plane->base.id); } static void ltdc_plane_atomic_print_state(struct drm_printer *p, @@ -1632,7 +1641,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, drm_plane_create_alpha_property(plane); - DRM_DEBUG_DRIVER("plane:%d created\n", plane->base.id); + drm_dbg_driver(plane->dev, "plane:%d created\n", plane->base.id); return plane; } @@ -1647,7 +1656,7 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) primary = ltdc_plane_create(ddev, DRM_PLANE_TYPE_PRIMARY, 0); if (!primary) { - DRM_ERROR("Can not create primary plane\n"); + drm_err(ddev, "Can not create primary plane\n"); return -EINVAL; } @@ -1668,7 +1677,7 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) ret = drmm_crtc_init_with_planes(ddev, crtc, primary, NULL, <dc_crtc_funcs, NULL); if (ret) { - DRM_ERROR("Can not initialize CRTC\n"); + drm_err(ddev, "Can not initialize CRTC\n"); return ret; } @@ -1677,13 +1686,13 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) drm_mode_crtc_set_gamma_size(crtc, CLUT_SIZE); drm_crtc_enable_color_mgmt(crtc, 0, false, CLUT_SIZE); - DRM_DEBUG_DRIVER("CRTC:%d created\n", crtc->base.id); + drm_dbg_driver(ddev, "CRTC:%d created\n", crtc->base.id); /* Add planes. Note : the first layer is used by primary plane */ for (i = 1; i < ldev->caps.nb_layers; i++) { overlay = ltdc_plane_create(ddev, DRM_PLANE_TYPE_OVERLAY, i); if (!overlay) { - DRM_ERROR("Can not create overlay plane %d\n", i); + drm_err(ddev, "Can not create overlay plane %d\n", i); return -ENOMEM; } if (ldev->caps.dynamic_zorder) @@ -1704,7 +1713,7 @@ static void ltdc_encoder_disable(struct drm_encoder *encoder) struct drm_device *ddev = encoder->dev; struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* Disable LTDC */ regmap_clear_bits(ldev->regmap, LTDC_GCR, GCR_LTDCEN); @@ -1718,7 +1727,7 @@ static void ltdc_encoder_enable(struct drm_encoder *encoder) struct drm_device *ddev = encoder->dev; struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* set fifo underrun threshold register */ if (ldev->caps.fifo_threshold) @@ -1734,7 +1743,7 @@ static void ltdc_encoder_mode_set(struct drm_encoder *encoder, { struct drm_device *ddev = encoder->dev; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* * Set to default state the pinctrl only with DPI type. @@ -1770,7 +1779,7 @@ static int ltdc_encoder_init(struct drm_device *ddev, struct drm_bridge *bridge) if (ret) return ret; - DRM_DEBUG_DRIVER("Bridge encoder:%d created\n", encoder->base.id); + drm_dbg_driver(encoder->dev, "Bridge encoder:%d created\n", encoder->base.id); return 0; } @@ -1779,6 +1788,7 @@ static int ltdc_get_caps(struct drm_device *ddev) { struct ltdc_device *ldev = ddev->dev_private; u32 bus_width_log2, lcr, gc2r; + const struct ltdc_plat_data *pdata = of_device_get_match_data(ddev->dev); /* * at least 1 layer must be managed & the number of layers @@ -1794,6 +1804,8 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.bus_width = 8 << bus_width_log2; regmap_read(ldev->regmap, LTDC_IDR, &ldev->caps.hw_version); + ldev->caps.pad_max_freq_hz = pdata->pad_max_freq_hz; + switch (ldev->caps.hw_version) { case HWVER_10200: case HWVER_10300: @@ -1811,7 +1823,6 @@ static int ltdc_get_caps(struct drm_device *ddev) * does not work on 2nd layer. */ ldev->caps.non_alpha_only_l1 = true; - ldev->caps.pad_max_freq_hz = 90000000; if (ldev->caps.hw_version == HWVER_10200) ldev->caps.pad_max_freq_hz = 65000000; ldev->caps.nb_irq = 2; @@ -1842,6 +1853,7 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.fifo_threshold = false; break; case HWVER_40100: + case HWVER_40101: ldev->caps.layer_ofs = LAY_OFS_1; ldev->caps.layer_regs = ltdc_layer_regs_a2; ldev->caps.pix_fmt_hw = ltdc_pix_fmt_a2; @@ -1849,7 +1861,6 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.pix_fmt_nb = ARRAY_SIZE(ltdc_drm_fmt_a2); ldev->caps.pix_fmt_flex = true; ldev->caps.non_alpha_only_l1 = false; - ldev->caps.pad_max_freq_hz = 90000000; ldev->caps.nb_irq = 2; ldev->caps.ycbcr_input = true; ldev->caps.ycbcr_output = true; @@ -1870,8 +1881,12 @@ void ltdc_suspend(struct drm_device *ddev) { struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + if (ldev->lvds_clk) + clk_disable_unprepare(ldev->lvds_clk); } int ltdc_resume(struct drm_device *ddev) @@ -1879,15 +1894,29 @@ int ltdc_resume(struct drm_device *ddev) struct ltdc_device *ldev = ddev->dev_private; int ret; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); ret = clk_prepare_enable(ldev->pixel_clk); if (ret) { - DRM_ERROR("failed to enable pixel clock (%d)\n", ret); + drm_err(ddev, "failed to enable pixel clock (%d)\n", ret); return ret; } - return 0; + if (ldev->bus_clk) { + ret = clk_prepare_enable(ldev->bus_clk); + if (ret) { + drm_err(ddev, "failed to enable bus clock (%d)\n", ret); + return ret; + } + } + + if (ldev->lvds_clk) { + ret = clk_prepare_enable(ldev->lvds_clk); + if (ret) + drm_err(ddev, "failed to prepare lvds clock\n"); + } + + return ret; } int ltdc_load(struct drm_device *ddev) @@ -1903,7 +1932,7 @@ int ltdc_load(struct drm_device *ddev) int irq, i, nb_endpoints; int ret = -ENODEV; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); /* Get number of endpoints */ nb_endpoints = of_graph_get_endpoint_count(np); @@ -1913,15 +1942,29 @@ int ltdc_load(struct drm_device *ddev) ldev->pixel_clk = devm_clk_get(dev, "lcd"); if (IS_ERR(ldev->pixel_clk)) { if (PTR_ERR(ldev->pixel_clk) != -EPROBE_DEFER) - DRM_ERROR("Unable to get lcd clock\n"); + drm_err(ddev, "Unable to get lcd clock\n"); return PTR_ERR(ldev->pixel_clk); } if (clk_prepare_enable(ldev->pixel_clk)) { - DRM_ERROR("Unable to prepare pixel clock\n"); + drm_err(ddev, "Unable to prepare pixel clock\n"); return -ENODEV; } + if (of_device_is_compatible(np, "st,stm32mp251-ltdc") || + of_device_is_compatible(np, "st,stm32mp255-ltdc")) { + ldev->bus_clk = devm_clk_get(dev, "bus"); + if (IS_ERR(ldev->bus_clk)) + return dev_err_probe(dev, PTR_ERR(ldev->bus_clk), + "Unable to get bus clock\n"); + + ret = clk_prepare_enable(ldev->bus_clk); + if (ret) { + drm_err(ddev, "Unable to prepare bus clock\n"); + return ret; + } + } + /* Get endpoints if any */ for (i = 0; i < nb_endpoints; i++) { ret = drm_of_find_panel_or_bridge(np, 0, i, &panel, &bridge); @@ -1939,7 +1982,7 @@ int ltdc_load(struct drm_device *ddev) if (panel) { bridge = drmm_panel_bridge_add(ddev, panel); if (IS_ERR(bridge)) { - DRM_ERROR("panel-bridge endpoint %d\n", i); + drm_err(ddev, "panel-bridge endpoint %d\n", i); ret = PTR_ERR(bridge); goto err; } @@ -1949,12 +1992,16 @@ int ltdc_load(struct drm_device *ddev) ret = ltdc_encoder_init(ddev, bridge); if (ret) { if (ret != -EPROBE_DEFER) - DRM_ERROR("init encoder endpoint %d\n", i); + drm_err(ddev, "init encoder endpoint %d\n", i); goto err; } } } + ldev->lvds_clk = devm_clk_get(dev, "lvds"); + if (IS_ERR(ldev->lvds_clk)) + ldev->lvds_clk = NULL; + rstc = devm_reset_control_get_exclusive(dev, NULL); mutex_init(&ldev->err_lock); @@ -1967,29 +2014,29 @@ int ltdc_load(struct drm_device *ddev) ldev->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ldev->regs)) { - DRM_ERROR("Unable to get ltdc registers\n"); + drm_err(ddev, "Unable to get ltdc registers\n"); ret = PTR_ERR(ldev->regs); goto err; } ldev->regmap = devm_regmap_init_mmio(&pdev->dev, ldev->regs, &stm32_ltdc_regmap_cfg); if (IS_ERR(ldev->regmap)) { - DRM_ERROR("Unable to regmap ltdc registers\n"); + drm_err(ddev, "Unable to regmap ltdc registers\n"); ret = PTR_ERR(ldev->regmap); goto err; } ret = ltdc_get_caps(ddev); if (ret) { - DRM_ERROR("hardware identifier (0x%08x) not supported!\n", - ldev->caps.hw_version); + drm_err(ddev, "hardware identifier (0x%08x) not supported!\n", + ldev->caps.hw_version); goto err; } /* Disable all interrupts */ regmap_clear_bits(ldev->regmap, LTDC_IER, IER_MASK); - DRM_DEBUG_DRIVER("ltdc hw version 0x%08x\n", ldev->caps.hw_version); + drm_dbg_driver(ddev, "ltdc hw version 0x%08x\n", ldev->caps.hw_version); /* initialize default value for fifo underrun threshold & clear interrupt error counters */ ldev->transfer_err = 0; @@ -2008,32 +2055,35 @@ int ltdc_load(struct drm_device *ddev) ltdc_irq_thread, IRQF_ONESHOT, dev_name(dev), ddev); if (ret) { - DRM_ERROR("Failed to register LTDC interrupt\n"); + drm_err(ddev, "Failed to register LTDC interrupt\n"); goto err; } } crtc = drmm_kzalloc(ddev, sizeof(*crtc), GFP_KERNEL); if (!crtc) { - DRM_ERROR("Failed to allocate crtc\n"); + drm_err(ddev, "Failed to allocate crtc\n"); ret = -ENOMEM; goto err; } ret = ltdc_crtc_init(ddev, crtc); if (ret) { - DRM_ERROR("Failed to init crtc\n"); + drm_err(ddev, "Failed to init crtc\n"); goto err; } ret = drm_vblank_init(ddev, NB_CRTC); if (ret) { - DRM_ERROR("Failed calling drm_vblank_init()\n"); + drm_err(ddev, "Failed calling drm_vblank_init()\n"); goto err; } clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + pinctrl_pm_select_sleep_state(ddev->dev); pm_runtime_enable(ddev->dev); @@ -2042,12 +2092,15 @@ int ltdc_load(struct drm_device *ddev) err: clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + return ret; } void ltdc_unload(struct drm_device *ddev) { - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); pm_runtime_disable(ddev->dev); } diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h index 9d488043ffdb..17b51a7ce28e 100644 --- a/drivers/gpu/drm/stm/ltdc.h +++ b/drivers/gpu/drm/stm/ltdc.h @@ -40,10 +40,16 @@ struct fps_info { ktime_t last_timestamp; }; +struct ltdc_plat_data { + int pad_max_freq_hz; /* max frequency supported by pad */ +}; + struct ltdc_device { void __iomem *regs; struct regmap *regmap; struct clk *pixel_clk; /* lcd pixel clock */ + struct clk *lvds_clk; /* lvds pixel clock */ + struct clk *bus_clk; /* bus clock */ struct mutex err_lock; /* protecting error_status */ struct ltdc_caps caps; u32 irq_status; diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_helper.h b/drivers/gpu/drm/sysfb/drm_sysfb_helper.h index 1424b63dde99..89633e30ca62 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_helper.h +++ b/drivers/gpu/drm/sysfb/drm_sysfb_helper.h @@ -132,7 +132,7 @@ int drm_sysfb_plane_helper_get_scanout_buffer(struct drm_plane *plane, struct drm_sysfb_crtc_state { struct drm_crtc_state base; - /* Primary-plane format; required for color mgmt. */ + /* CRTC input color format; required for color mgmt. */ const struct drm_format_info *format; }; diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c b/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c index 1bcdb5ee8f09..ddb4a7523ee6 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c +++ b/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c @@ -210,7 +210,12 @@ int drm_sysfb_plane_helper_atomic_check(struct drm_plane *plane, else if (!new_plane_state->visible) return 0; - if (new_fb->format != sysfb->fb_format) { + new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); + + new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); + new_sysfb_crtc_state->format = sysfb->fb_format; + + if (new_fb->format != new_sysfb_crtc_state->format) { void *buf; /* format conversion necessary; reserve buffer */ @@ -220,11 +225,6 @@ int drm_sysfb_plane_helper_atomic_check(struct drm_plane *plane, return -ENOMEM; } - new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); - - new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); - new_sysfb_crtc_state->format = new_fb->format; - return 0; } EXPORT_SYMBOL(drm_sysfb_plane_helper_atomic_check); @@ -238,7 +238,9 @@ void drm_sysfb_plane_helper_atomic_update(struct drm_plane *plane, struct drm_at struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; unsigned int dst_pitch = sysfb->fb_pitch; - const struct drm_format_info *dst_format = sysfb->fb_format; + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); + struct drm_sysfb_crtc_state *sysfb_crtc_state = to_drm_sysfb_crtc_state(crtc_state); + const struct drm_format_info *dst_format = sysfb_crtc_state->format; struct drm_atomic_helper_damage_iter iter; struct drm_rect damage; int ret, idx; @@ -370,16 +372,19 @@ EXPORT_SYMBOL(drm_sysfb_crtc_helper_atomic_check); void drm_sysfb_crtc_reset(struct drm_crtc *crtc) { + struct drm_sysfb_device *sysfb = to_drm_sysfb_device(crtc->dev); struct drm_sysfb_crtc_state *sysfb_crtc_state; if (crtc->state) drm_sysfb_crtc_state_destroy(to_drm_sysfb_crtc_state(crtc->state)); sysfb_crtc_state = kzalloc(sizeof(*sysfb_crtc_state), GFP_KERNEL); - if (sysfb_crtc_state) + if (sysfb_crtc_state) { + sysfb_crtc_state->format = sysfb->fb_format; __drm_atomic_helper_crtc_reset(crtc, &sysfb_crtc_state->base); - else + } else { __drm_atomic_helper_crtc_reset(crtc, NULL); + } } EXPORT_SYMBOL(drm_sysfb_crtc_reset); diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c b/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c index 0b3fb874a51f..885864168c54 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c +++ b/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c @@ -79,22 +79,19 @@ const struct drm_format_info *drm_sysfb_get_format_si(struct drm_device *dev, const struct screen_info *si) { const struct drm_format_info *format = NULL; - u32 bits_per_pixel; + struct pixel_format pixel; size_t i; + int ret; - bits_per_pixel = __screen_info_lfb_bits_per_pixel(si); + ret = screen_info_pixel_format(si, &pixel); + if (ret) + return NULL; for (i = 0; i < nformats; ++i) { - const struct pixel_format *f = &formats[i].pixel; - - if (bits_per_pixel == f->bits_per_pixel && - si->red_size == f->red.length && - si->red_pos == f->red.offset && - si->green_size == f->green.length && - si->green_pos == f->green.offset && - si->blue_size == f->blue.length && - si->blue_pos == f->blue.offset) { - format = drm_format_info(formats[i].fourcc); + const struct drm_sysfb_format *f = &formats[i]; + + if (pixel_format_equal(&pixel, &f->pixel)) { + format = drm_format_info(f->fourcc); break; } } diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 90615e9ac86b..16a4b52d45c6 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -46,6 +46,7 @@ static const struct drm_format_info *vesadrm_get_format_si(struct drm_device *de { PIXEL_FORMAT_RGB888, DRM_FORMAT_RGB888, }, { PIXEL_FORMAT_XRGB8888, DRM_FORMAT_XRGB8888, }, { PIXEL_FORMAT_XBGR8888, DRM_FORMAT_XBGR8888, }, + { PIXEL_FORMAT_C8, DRM_FORMAT_C8, }, }; return drm_sysfb_get_format_si(dev, formats, ARRAY_SIZE(formats), si); @@ -82,7 +83,7 @@ static struct vesadrm_device *to_vesadrm_device(struct drm_device *dev) } /* - * Palette + * Color LUT */ static void vesadrm_vga_cmap_write(struct vesadrm_device *vesa, unsigned int index, @@ -128,7 +129,7 @@ static void vesadrm_pmi_cmap_write(struct vesadrm_device *vesa, unsigned int ind } #endif -static void vesadrm_set_gamma_lut(struct drm_crtc *crtc, unsigned int index, +static void vesadrm_set_color_lut(struct drm_crtc *crtc, unsigned int index, u16 red, u16 green, u16 blue) { struct drm_device *dev = crtc->dev; @@ -149,15 +150,15 @@ static void vesadrm_fill_gamma_lut(struct vesadrm_device *vesa, switch (format->format) { case DRM_FORMAT_XRGB1555: - drm_crtc_fill_gamma_555(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_555(crtc, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB565: - drm_crtc_fill_gamma_565(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_565(crtc, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_BGRX8888: - drm_crtc_fill_gamma_888(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_888(crtc, vesadrm_set_color_lut); break; default: drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", @@ -175,15 +176,53 @@ static void vesadrm_load_gamma_lut(struct vesadrm_device *vesa, switch (format->format) { case DRM_FORMAT_XRGB1555: - drm_crtc_load_gamma_555_from_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_555_from_888(crtc, lut, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB565: - drm_crtc_load_gamma_565_from_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_565_from_888(crtc, lut, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_BGRX8888: - drm_crtc_load_gamma_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_888(crtc, lut, vesadrm_set_color_lut); + break; + default: + drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", + &format->format); + break; + } +} + +static void vesadrm_fill_palette_lut(struct vesadrm_device *vesa, + const struct drm_format_info *format) +{ + struct drm_device *dev = &vesa->sysfb.dev; + struct drm_crtc *crtc = &vesa->crtc; + + switch (format->format) { + case DRM_FORMAT_C8: + drm_crtc_fill_palette_8(crtc, vesadrm_set_color_lut); + break; + case DRM_FORMAT_RGB332: + drm_crtc_fill_palette_332(crtc, vesadrm_set_color_lut); + break; + default: + drm_warn_once(dev, "Unsupported format %p4cc for palette\n", + &format->format); + break; + } +} + +static void vesadrm_load_palette_lut(struct vesadrm_device *vesa, + const struct drm_format_info *format, + struct drm_color_lut *lut) +{ + struct drm_device *dev = &vesa->sysfb.dev; + struct drm_crtc *crtc = &vesa->crtc; + + switch (format->format) { + case DRM_FORMAT_C8: + drm_crtc_load_palette_8(crtc, lut, vesadrm_set_color_lut); break; default: drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", @@ -200,8 +239,67 @@ static const u64 vesadrm_primary_plane_format_modifiers[] = { DRM_SYSFB_PLANE_FORMAT_MODIFIERS, }; +static int vesadrm_primary_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *new_state) +{ + struct drm_sysfb_device *sysfb = to_drm_sysfb_device(plane->dev); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(new_state, plane); + struct drm_framebuffer *new_fb = new_plane_state->fb; + struct drm_crtc_state *new_crtc_state; + struct drm_sysfb_crtc_state *new_sysfb_crtc_state; + int ret; + + ret = drm_sysfb_plane_helper_atomic_check(plane, new_state); + if (ret) + return ret; + else if (!new_plane_state->visible) + return 0; + + /* + * Fix up format conversion for specific cases + */ + + switch (sysfb->fb_format->format) { + case DRM_FORMAT_C8: + new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); + new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); + + switch (new_fb->format->format) { + case DRM_FORMAT_XRGB8888: + /* + * Reduce XRGB8888 to RGB332. Each resulting pixel is an index + * into the C8 hardware palette, which stores RGB332 colors. + */ + if (new_sysfb_crtc_state->format->format != DRM_FORMAT_RGB332) { + new_sysfb_crtc_state->format = + drm_format_info(DRM_FORMAT_RGB332); + new_crtc_state->color_mgmt_changed = true; + } + break; + case DRM_FORMAT_C8: + /* + * Restore original output. Emulation of XRGB8888 set RBG332 + * output format and hardware palette. This needs to be undone + * when we switch back to DRM_FORMAT_C8. + */ + if (new_sysfb_crtc_state->format->format == DRM_FORMAT_RGB332) { + new_sysfb_crtc_state->format = sysfb->fb_format; + new_crtc_state->color_mgmt_changed = true; + } + break; + } + break; + } + + return 0; +} + static const struct drm_plane_helper_funcs vesadrm_primary_plane_helper_funcs = { - DRM_SYSFB_PLANE_HELPER_FUNCS, + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = vesadrm_primary_plane_helper_atomic_check, + .atomic_update = drm_sysfb_plane_helper_atomic_update, + .atomic_disable = drm_sysfb_plane_helper_atomic_disable, + .get_scanout_buffer = drm_sysfb_plane_helper_get_scanout_buffer, }; static const struct drm_plane_funcs vesadrm_primary_plane_funcs = { @@ -223,15 +321,36 @@ static void vesadrm_crtc_helper_atomic_flush(struct drm_crtc *crtc, * plane's color format. */ if (crtc_state->enable && crtc_state->color_mgmt_changed) { - if (sysfb_crtc_state->format == sysfb->fb_format) { - if (crtc_state->gamma_lut) - vesadrm_load_gamma_lut(vesa, - sysfb_crtc_state->format, - crtc_state->gamma_lut->data); - else + switch (sysfb->fb_format->format) { + /* + * Index formats + */ + case DRM_FORMAT_C8: + if (sysfb_crtc_state->format->format == DRM_FORMAT_RGB332) { + vesadrm_fill_palette_lut(vesa, sysfb_crtc_state->format); + } else if (crtc->state->gamma_lut) { + vesadrm_load_palette_lut(vesa, + sysfb_crtc_state->format, + crtc_state->gamma_lut->data); + } else { + vesadrm_fill_palette_lut(vesa, sysfb_crtc_state->format); + } + break; + /* + * Component formats + */ + default: + if (sysfb_crtc_state->format == sysfb->fb_format) { + if (crtc_state->gamma_lut) + vesadrm_load_gamma_lut(vesa, + sysfb_crtc_state->format, + crtc_state->gamma_lut->data); + else + vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); + } else { vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); - } else { - vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); + } + break; } } } diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 41a285ec889f..8ede07fb7a21 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -526,7 +526,7 @@ void tegra_bo_free_object(struct drm_gem_object *gem) if (drm_gem_is_imported(gem)) { dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt, DMA_TO_DEVICE); - dma_buf_detach(gem->dma_buf, gem->import_attach); + dma_buf_detach(gem->import_attach->dmabuf, gem->import_attach); } } diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index d6c4dd1194a0..3a20c788c51f 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -150,14 +150,22 @@ static void test_prepare(struct kunit *test) static void test_prepare_array(struct kunit *test) { struct drm_exec_priv *priv = test->priv; - struct drm_gem_object gobj1 = { }; - struct drm_gem_object gobj2 = { }; - struct drm_gem_object *array[] = { &gobj1, &gobj2 }; + struct drm_gem_object *gobj1; + struct drm_gem_object *gobj2; + struct drm_gem_object *array[] = { + (gobj1 = kunit_kzalloc(test, sizeof(*gobj1), GFP_KERNEL)), + (gobj2 = kunit_kzalloc(test, sizeof(*gobj2), GFP_KERNEL)), + }; struct drm_exec exec; int ret; - drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE); - drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE); + if (!gobj1 || !gobj2) { + KUNIT_FAIL(test, "Failed to allocate GEM objects.\n"); + return; + } + + drm_gem_private_object_init(priv->drm, gobj1, PAGE_SIZE); + drm_gem_private_object_init(priv->drm, gobj2, PAGE_SIZE); drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) @@ -166,8 +174,8 @@ static void test_prepare_array(struct kunit *test) KUNIT_EXPECT_EQ(test, ret, 0); drm_exec_fini(&exec); - drm_gem_private_object_fini(&gobj1); - drm_gem_private_object_fini(&gobj2); + drm_gem_private_object_fini(gobj1); + drm_gem_private_object_fini(gobj2); } static void test_multiple_loops(struct kunit *test) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 7299fa8971ce..981dada8f3a8 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -1033,13 +1033,14 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) NULL : &result->dst_pitch; drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); - buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ memset(buf, 0, dst_size); drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index 3f6cff2ab1b2..7c8c15a5c39b 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -4,6 +4,7 @@ * Author: Jyri Sarha <jsarha@ti.com> */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -594,79 +595,53 @@ void tidss_disable_oldi(struct tidss_device *tidss, u32 hw_videoport) * number. For example 7:0 */ -static u32 FLD_MASK(u32 start, u32 end) -{ - return ((1 << (start - end + 1)) - 1) << end; -} - -static u32 FLD_VAL(u32 val, u32 start, u32 end) -{ - return (val << end) & FLD_MASK(start, end); -} - -static u32 FLD_GET(u32 val, u32 start, u32 end) -{ - return (val & FLD_MASK(start, end)) >> end; -} - -static u32 FLD_MOD(u32 orig, u32 val, u32 start, u32 end) -{ - return (orig & ~FLD_MASK(start, end)) | FLD_VAL(val, start, end); -} - -static u32 REG_GET(struct dispc_device *dispc, u32 idx, u32 start, u32 end) -{ - return FLD_GET(dispc_read(dispc, idx), start, end); -} - -static void REG_FLD_MOD(struct dispc_device *dispc, u32 idx, u32 val, - u32 start, u32 end) -{ - dispc_write(dispc, idx, FLD_MOD(dispc_read(dispc, idx), val, - start, end)); -} - -static u32 VID_REG_GET(struct dispc_device *dispc, u32 hw_plane, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_vid_read(dispc, hw_plane, idx), start, end); -} - -static void VID_REG_FLD_MOD(struct dispc_device *dispc, u32 hw_plane, u32 idx, - u32 val, u32 start, u32 end) -{ - dispc_vid_write(dispc, hw_plane, idx, - FLD_MOD(dispc_vid_read(dispc, hw_plane, idx), - val, start, end)); -} - -static u32 VP_REG_GET(struct dispc_device *dispc, u32 vp, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_vp_read(dispc, vp, idx), start, end); -} - -static void VP_REG_FLD_MOD(struct dispc_device *dispc, u32 vp, u32 idx, u32 val, - u32 start, u32 end) -{ - dispc_vp_write(dispc, vp, idx, FLD_MOD(dispc_vp_read(dispc, vp, idx), - val, start, end)); -} - -__maybe_unused -static u32 OVR_REG_GET(struct dispc_device *dispc, u32 ovr, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_ovr_read(dispc, ovr, idx), start, end); -} - -static void OVR_REG_FLD_MOD(struct dispc_device *dispc, u32 ovr, u32 idx, - u32 val, u32 start, u32 end) -{ - dispc_ovr_write(dispc, ovr, idx, - FLD_MOD(dispc_ovr_read(dispc, ovr, idx), - val, start, end)); -} +#define REG_GET(dispc, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_read((dispc), (idx)))) + +#define REG_FLD_MOD(dispc, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _idx = (idx); \ + u32 _reg = dispc_read(_dispc, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_write(_dispc, _idx, _reg); \ + }) + +#define VID_REG_GET(dispc, hw_plane, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_vid_read((dispc), (hw_plane), (idx)))) + +#define VID_REG_FLD_MOD(dispc, hw_plane, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _hw_plane = (hw_plane); \ + u32 _idx = (idx); \ + u32 _reg = dispc_vid_read(_dispc, _hw_plane, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_vid_write(_dispc, _hw_plane, _idx, _reg); \ + }) + +#define VP_REG_GET(dispc, vp, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_vp_read((dispc), (vp), (idx)))) + +#define VP_REG_FLD_MOD(dispc, vp, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _vp = (vp); \ + u32 _idx = (idx); \ + u32 _reg = dispc_vp_read(_dispc, _vp, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_vp_write(_dispc, _vp, _idx, _reg); \ + }) + +#define OVR_REG_FLD_MOD(dispc, ovr, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _ovr = (ovr); \ + u32 _idx = (idx); \ + u32 _reg = dispc_ovr_read(_dispc, _ovr, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_ovr_write(_dispc, _ovr, _idx, _reg); \ + }) static dispc_irq_t dispc_vp_irq_from_raw(u32 stat, u32 hw_videoport) { @@ -1139,7 +1114,8 @@ static void dispc_set_num_datalines(struct dispc_device *dispc, v = 3; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, v, 10, 8); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, v, + DISPC_VP_CONTROL_DATALINES_MASK); } static void dispc_enable_am65x_oldi(struct dispc_device *dispc, u32 hw_videoport, @@ -1162,7 +1138,8 @@ static void dispc_enable_am65x_oldi(struct dispc_device *dispc, u32 hw_videoport oldi_cfg |= BIT(7); /* DEPOL */ - oldi_cfg = FLD_MOD(oldi_cfg, fmt->am65x_oldi_mode_reg_val, 3, 1); + FIELD_MODIFY(DISPC_VP_DSS_OLDI_CFG_MAP_MASK, &oldi_cfg, + fmt->am65x_oldi_mode_reg_val); oldi_cfg |= BIT(12); /* SOFTRST */ @@ -1224,14 +1201,14 @@ void dispc_vp_enable(struct dispc_device *dispc, u32 hw_videoport, vbp = mode->crtc_vtotal - mode->crtc_vsync_end; dispc_vp_write(dispc, hw_videoport, DISPC_VP_TIMING_H, - FLD_VAL(hsw - 1, 7, 0) | - FLD_VAL(hfp - 1, 19, 8) | - FLD_VAL(hbp - 1, 31, 20)); + FIELD_PREP(DISPC_VP_TIMING_H_SYNC_PULSE_MASK, hsw - 1) | + FIELD_PREP(DISPC_VP_TIMING_H_FRONT_PORCH_MASK, hfp - 1) | + FIELD_PREP(DISPC_VP_TIMING_H_BACK_PORCH_MASK, hbp - 1)); dispc_vp_write(dispc, hw_videoport, DISPC_VP_TIMING_V, - FLD_VAL(vsw - 1, 7, 0) | - FLD_VAL(vfp, 19, 8) | - FLD_VAL(vbp, 31, 20)); + FIELD_PREP(DISPC_VP_TIMING_V_SYNC_PULSE_MASK, vsw - 1) | + FIELD_PREP(DISPC_VP_TIMING_V_FRONT_PORCH_MASK, vfp) | + FIELD_PREP(DISPC_VP_TIMING_V_BACK_PORCH_MASK, vbp)); ivs = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); @@ -1254,24 +1231,28 @@ void dispc_vp_enable(struct dispc_device *dispc, u32 hw_videoport, ieo = false; dispc_vp_write(dispc, hw_videoport, DISPC_VP_POL_FREQ, - FLD_VAL(align, 18, 18) | - FLD_VAL(onoff, 17, 17) | - FLD_VAL(rf, 16, 16) | - FLD_VAL(ieo, 15, 15) | - FLD_VAL(ipc, 14, 14) | - FLD_VAL(ihs, 13, 13) | - FLD_VAL(ivs, 12, 12)); + FIELD_PREP(DISPC_VP_POL_FREQ_ALIGN_MASK, align) | + FIELD_PREP(DISPC_VP_POL_FREQ_ONOFF_MASK, onoff) | + FIELD_PREP(DISPC_VP_POL_FREQ_RF_MASK, rf) | + FIELD_PREP(DISPC_VP_POL_FREQ_IEO_MASK, ieo) | + FIELD_PREP(DISPC_VP_POL_FREQ_IPC_MASK, ipc) | + FIELD_PREP(DISPC_VP_POL_FREQ_IHS_MASK, ihs) | + FIELD_PREP(DISPC_VP_POL_FREQ_IVS_MASK, ivs)); dispc_vp_write(dispc, hw_videoport, DISPC_VP_SIZE_SCREEN, - FLD_VAL(mode->crtc_hdisplay - 1, 11, 0) | - FLD_VAL(mode->crtc_vdisplay - 1, 27, 16)); + FIELD_PREP(DISPC_VP_SIZE_SCREEN_HDISPLAY_MASK, + mode->crtc_hdisplay - 1) | + FIELD_PREP(DISPC_VP_SIZE_SCREEN_VDISPLAY_MASK, + mode->crtc_vdisplay - 1)); - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, 0, 0); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, + DISPC_VP_CONTROL_ENABLE_MASK); } void dispc_vp_disable(struct dispc_device *dispc, u32 hw_videoport) { - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 0, 0, 0); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 0, + DISPC_VP_CONTROL_ENABLE_MASK); } void dispc_vp_unprepare(struct dispc_device *dispc, u32 hw_videoport) @@ -1285,13 +1266,16 @@ void dispc_vp_unprepare(struct dispc_device *dispc, u32 hw_videoport) bool dispc_vp_go_busy(struct dispc_device *dispc, u32 hw_videoport) { - return VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, 5, 5); + return VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, + DISPC_VP_CONTROL_GOBIT_MASK); } void dispc_vp_go(struct dispc_device *dispc, u32 hw_videoport) { - WARN_ON(VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, 5, 5)); - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, 5, 5); + WARN_ON(VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, + DISPC_VP_CONTROL_GOBIT_MASK)); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, + DISPC_VP_CONTROL_GOBIT_MASK); } enum c8_to_c12_mode { C8_TO_C12_REPLICATE, C8_TO_C12_MAX, C8_TO_C12_MIN }; @@ -1491,11 +1475,11 @@ static void dispc_am65x_ovr_set_plane(struct dispc_device *dispc, u32 hw_id = dispc->feat->vid_info[hw_plane].hw_id; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - hw_id, 4, 1); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - x, 17, 6); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - y, 30, 19); + hw_id, DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), x, + DISPC_OVR_ATTRIBUTES_POSX_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), y, + DISPC_OVR_ATTRIBUTES_POSY_MASK); } static void dispc_j721e_ovr_set_plane(struct dispc_device *dispc, @@ -1505,11 +1489,11 @@ static void dispc_j721e_ovr_set_plane(struct dispc_device *dispc, u32 hw_id = dispc->feat->vid_info[hw_plane].hw_id; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - hw_id, 4, 1); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), - x, 13, 0); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), - y, 29, 16); + hw_id, DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), x, + DISPC_OVR_ATTRIBUTES2_POSX_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), y, + DISPC_OVR_ATTRIBUTES2_POSY_MASK); } void dispc_ovr_set_plane(struct dispc_device *dispc, u32 hw_plane, @@ -1544,7 +1528,7 @@ void dispc_ovr_enable_layer(struct dispc_device *dispc, return; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - !!enable, 0, 0); + !!enable, DISPC_OVR_ATTRIBUTES_ENABLE_MASK); } /* CSC */ @@ -1580,14 +1564,14 @@ struct dispc_csc_coef { static void dispc_csc_offset_regval(const struct dispc_csc_coef *csc, u32 *regval) { -#define OVAL(x, y) (FLD_VAL(x, 15, 3) | FLD_VAL(y, 31, 19)) +#define OVAL(x, y) (FIELD_PREP(GENMASK(15, 3), x) | FIELD_PREP(GENMASK(31, 19), y)) regval[5] = OVAL(csc->preoffset[0], csc->preoffset[1]); regval[6] = OVAL(csc->preoffset[2], csc->postoffset[0]); regval[7] = OVAL(csc->postoffset[1], csc->postoffset[2]); #undef OVAL } -#define CVAL(x, y) (FLD_VAL(x, 10, 0) | FLD_VAL(y, 26, 16)) +#define CVAL(x, y) (FIELD_PREP(GENMASK(10, 0), x) | FIELD_PREP(GENMASK(26, 16), y)) static void dispc_csc_yuv2rgb_regval(const struct dispc_csc_coef *csc, u32 *regval) { @@ -1767,7 +1751,8 @@ static void dispc_vid_csc_setup(struct dispc_device *dispc, u32 hw_plane, static void dispc_vid_csc_enable(struct dispc_device *dispc, u32 hw_plane, bool enable) { - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, 9, 9); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, + DISPC_VID_ATTRIBUTES_COLORCONVENABLE_MASK); } /* SCALER */ @@ -1826,7 +1811,8 @@ static void dispc_vid_write_fir_coefs(struct dispc_device *dispc, c1 = coefs->c1[phase]; c2 = coefs->c2[phase]; - c12 = FLD_VAL(c1, 19, 10) | FLD_VAL(c2, 29, 20); + c12 = FIELD_PREP(GENMASK(19, 10), c1) | FIELD_PREP(GENMASK(29, 20), + c2); dispc_vid_write(dispc, hw_plane, reg, c12); } @@ -2023,20 +2009,20 @@ static void dispc_vid_set_scaling(struct dispc_device *dispc, u32 fourcc) { /* HORIZONTAL RESIZE ENABLE */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->scale_x, 7, 7); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->scale_x, + DISPC_VID_ATTRIBUTES_HRESIZEENABLE_MASK); /* VERTICAL RESIZE ENABLE */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->scale_y, 8, 8); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->scale_y, + DISPC_VID_ATTRIBUTES_VRESIZEENABLE_MASK); /* Skip the rest if no scaling is used */ if (!sp->scale_x && !sp->scale_y) return; /* VERTICAL 5-TAPS */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->five_taps, 21, 21); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->five_taps, + DISPC_VID_ATTRIBUTES_VERTICALTAPS_MASK); if (dispc_fourcc_is_yuv(fourcc)) { if (sp->scale_x) { @@ -2126,7 +2112,7 @@ static void dispc_plane_set_pixel_format(struct dispc_device *dispc, if (dispc_color_formats[i].fourcc == fourcc) { VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, dispc_color_formats[i].dss_code, - 6, 1); + DISPC_VID_ATTRIBUTES_FORMAT_MASK); return; } } @@ -2248,7 +2234,8 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, dispc_vid_write(dispc, hw_plane, DISPC_VID_BA_EXT_1, (u64)dma_addr >> 32); dispc_vid_write(dispc, hw_plane, DISPC_VID_PICTURE_SIZE, - (scale.in_w - 1) | ((scale.in_h - 1) << 16)); + FIELD_PREP(DISPC_VID_PICTURE_SIZE_MEMSIZEY_MASK, scale.in_h - 1) | + FIELD_PREP(DISPC_VID_PICTURE_SIZE_MEMSIZEX_MASK, scale.in_w - 1)); /* For YUV422 format we use the macropixel size for pixel inc */ if (fourcc == DRM_FORMAT_YUYV || fourcc == DRM_FORMAT_UYVY) @@ -2285,8 +2272,10 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, if (!lite) { dispc_vid_write(dispc, hw_plane, DISPC_VID_SIZE, - (state->crtc_w - 1) | - ((state->crtc_h - 1) << 16)); + FIELD_PREP(DISPC_VID_SIZE_SIZEY_MASK, + state->crtc_h - 1) | + FIELD_PREP(DISPC_VID_SIZE_SIZEX_MASK, + state->crtc_w - 1)); dispc_vid_set_scaling(dispc, hw_plane, &scale, fourcc); } @@ -2300,38 +2289,45 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, } dispc_vid_write(dispc, hw_plane, DISPC_VID_GLOBAL_ALPHA, - 0xFF & (state->alpha >> 8)); + FIELD_PREP(DISPC_VID_GLOBAL_ALPHA_GLOBALALPHA_MASK, + state->alpha >> 8)); if (state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 1, - 28, 28); + DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK); else VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 0, - 28, 28); + DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK); } void dispc_plane_enable(struct dispc_device *dispc, u32 hw_plane, bool enable) { - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, 0, 0); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, + DISPC_VID_ATTRIBUTES_ENABLE_MASK); } static u32 dispc_vid_get_fifo_size(struct dispc_device *dispc, u32 hw_plane) { - return VID_REG_GET(dispc, hw_plane, DISPC_VID_BUF_SIZE_STATUS, 15, 0); + return VID_REG_GET(dispc, hw_plane, DISPC_VID_BUF_SIZE_STATUS, + DISPC_VID_BUF_SIZE_STATUS_BUFSIZE_MASK); } static void dispc_vid_set_mflag_threshold(struct dispc_device *dispc, u32 hw_plane, u32 low, u32 high) { dispc_vid_write(dispc, hw_plane, DISPC_VID_MFLAG_THRESHOLD, - FLD_VAL(high, 31, 16) | FLD_VAL(low, 15, 0)); + FIELD_PREP(DISPC_VID_MFLAG_THRESHOLD_HT_MFLAG_MASK, high) | + FIELD_PREP(DISPC_VID_MFLAG_THRESHOLD_LT_MFLAG_MASK, low)); } static void dispc_vid_set_buf_threshold(struct dispc_device *dispc, u32 hw_plane, u32 low, u32 high) { dispc_vid_write(dispc, hw_plane, DISPC_VID_BUF_THRESHOLD, - FLD_VAL(high, 31, 16) | FLD_VAL(low, 15, 0)); + FIELD_PREP(DISPC_VID_BUF_THRESHOLD_BUFHIGHTHRESHOLD_MASK, + high) | + FIELD_PREP(DISPC_VID_BUF_THRESHOLD_BUFLOWTHRESHOLD_MASK, + low)); } static void dispc_k2g_plane_init(struct dispc_device *dispc) @@ -2341,9 +2337,11 @@ static void dispc_k2g_plane_init(struct dispc_device *dispc) dev_dbg(dispc->dev, "%s()\n", __func__); /* MFLAG_CTRL = ENABLED */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, 1, 0); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK); /* MFLAG_START = MFLAGNORMALSTARTMODE */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, 6, 6); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK); for (hw_plane = 0; hw_plane < dispc->feat->num_vids; hw_plane++) { u32 size = dispc_vid_get_fifo_size(dispc, hw_plane); @@ -2380,7 +2378,7 @@ static void dispc_k2g_plane_init(struct dispc_device *dispc) * register is ignored. */ VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 1, - 19, 19); + DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK); } } @@ -2392,13 +2390,15 @@ static void dispc_k3_plane_init(struct dispc_device *dispc) dev_dbg(dispc->dev, "%s()\n", __func__); - REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_lo_pri, 2, 0); - REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_hi_pri, 5, 3); + REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_lo_pri, DSS_CBA_CFG_PRI_LO_MASK); + REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_hi_pri, DSS_CBA_CFG_PRI_HI_MASK); /* MFLAG_CTRL = ENABLED */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, 1, 0); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK); /* MFLAG_START = MFLAGNORMALSTARTMODE */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, 6, 6); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK); for (hw_plane = 0; hw_plane < dispc->feat->num_vids; hw_plane++) { u32 size = dispc_vid_get_fifo_size(dispc, hw_plane); @@ -2431,7 +2431,7 @@ static void dispc_k3_plane_init(struct dispc_device *dispc) /* Prefech up to PRELOAD value */ VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 0, - 19, 19); + DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK); } } @@ -2461,7 +2461,8 @@ static void dispc_vp_init(struct dispc_device *dispc) /* Enable the gamma Shadow bit-field for all VPs*/ for (i = 0; i < dispc->feat->num_vps; i++) - VP_REG_FLD_MOD(dispc, i, DISPC_VP_CONFIG, 1, 2, 2); + VP_REG_FLD_MOD(dispc, i, DISPC_VP_CONFIG, 1, + DISPC_VP_CONFIG_GAMMAENABLE_MASK); } static void dispc_initial_config(struct dispc_device *dispc) @@ -2472,8 +2473,8 @@ static void dispc_initial_config(struct dispc_device *dispc) /* Note: Hardcoded DPI routing on J721E for now */ if (dispc->feat->subrev == DISPC_J721E) { dispc_write(dispc, DISPC_CONNECTIONS, - FLD_VAL(2, 3, 0) | /* VP1 to DPI0 */ - FLD_VAL(8, 7, 4) /* VP3 to DPI1 */ + FIELD_PREP(DISPC_CONNECTIONS_DPI_0_CONN_MASK, 2) | /* VP1 to DPI0 */ + FIELD_PREP(DISPC_CONNECTIONS_DPI_1_CONN_MASK, 8) /* VP3 to DPI1 */ ); } } @@ -2651,8 +2652,8 @@ static void dispc_k2g_cpr_from_ctm(const struct drm_color_ctm *ctm, cpr->m[CSC_BB] = dispc_S31_32_to_s2_8(ctm->matrix[8]); } -#define CVAL(xR, xG, xB) (FLD_VAL(xR, 9, 0) | FLD_VAL(xG, 20, 11) | \ - FLD_VAL(xB, 31, 22)) +#define CVAL(xR, xG, xB) (FIELD_PREP(GENMASK(9, 0), xR) | FIELD_PREP(GENMASK(20, 11), xG) | \ + FIELD_PREP(GENMASK(31, 22), xB)) static void dispc_k2g_vp_csc_cpr_regval(const struct dispc_csc_coef *csc, u32 *regval) @@ -2694,8 +2695,8 @@ static void dispc_k2g_vp_set_ctm(struct dispc_device *dispc, u32 hw_videoport, cprenable = 1; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, - cprenable, 15, 15); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, cprenable, + DISPC_VP_CONFIG_CPR_MASK); } static s16 dispc_S31_32_to_s3_8(s64 coef) @@ -2760,8 +2761,8 @@ static void dispc_k3_vp_set_ctm(struct dispc_device *dispc, u32 hw_videoport, colorconvenable = 1; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, - colorconvenable, 24, 24); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, colorconvenable, + DISPC_VP_CONFIG_COLORCONVENABLE_MASK); } static void dispc_vp_set_color_mgmt(struct dispc_device *dispc, @@ -2816,26 +2817,26 @@ int dispc_runtime_resume(struct dispc_device *dispc) clk_prepare_enable(dispc->fclk); - if (REG_GET(dispc, DSS_SYSSTATUS, 0, 0) == 0) + if (REG_GET(dispc, DSS_SYSSTATUS, DSS_SYSSTATUS_DISPC_FUNC_RESETDONE) == 0) dev_warn(dispc->dev, "DSS FUNC RESET not done!\n"); dev_dbg(dispc->dev, "OMAP DSS7 rev 0x%x\n", dispc_read(dispc, DSS_REVISION)); dev_dbg(dispc->dev, "VP RESETDONE %d,%d,%d\n", - REG_GET(dispc, DSS_SYSSTATUS, 1, 1), - REG_GET(dispc, DSS_SYSSTATUS, 2, 2), - REG_GET(dispc, DSS_SYSSTATUS, 3, 3)); + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(1, 1)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(2, 2)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(3, 3))); if (dispc->feat->subrev == DISPC_AM625 || dispc->feat->subrev == DISPC_AM65X) dev_dbg(dispc->dev, "OLDI RESETDONE %d,%d,%d\n", - REG_GET(dispc, DSS_SYSSTATUS, 5, 5), - REG_GET(dispc, DSS_SYSSTATUS, 6, 6), - REG_GET(dispc, DSS_SYSSTATUS, 7, 7)); + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(5, 5)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(6, 6)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(7, 7))); dev_dbg(dispc->dev, "DISPC IDLE %d\n", - REG_GET(dispc, DSS_SYSSTATUS, 9, 9)); + REG_GET(dispc, DSS_SYSSTATUS, DSS_SYSSTATUS_DISPC_IDLE_STATUS)); dispc_initial_config(dispc); @@ -2912,7 +2913,8 @@ static void dispc_softreset_k2g(struct dispc_device *dispc) spin_unlock_irqrestore(&dispc->tidss->irq_lock, flags); for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx) - VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0); + VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, + DISPC_VP_CONTROL_ENABLE_MASK); } static int dispc_softreset(struct dispc_device *dispc) @@ -2926,7 +2928,7 @@ static int dispc_softreset(struct dispc_device *dispc) } /* Soft reset */ - REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1); + REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, DSS_SYSCONFIG_SOFTRESET_MASK); /* Wait for reset to complete */ ret = readl_poll_timeout(dispc->base_common + DSS_SYSSTATUS, val, val & 1, 100, 5000); diff --git a/drivers/gpu/drm/tidss/tidss_dispc_regs.h b/drivers/gpu/drm/tidss/tidss_dispc_regs.h index 50a3f28250ef..382027dddce8 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc_regs.h +++ b/drivers/gpu/drm/tidss/tidss_dispc_regs.h @@ -56,7 +56,12 @@ enum dispc_common_regs { #define DSS_REVISION REG(DSS_REVISION) #define DSS_SYSCONFIG REG(DSS_SYSCONFIG) +#define DSS_SYSCONFIG_SOFTRESET_MASK GENMASK(1, 1) + #define DSS_SYSSTATUS REG(DSS_SYSSTATUS) +#define DSS_SYSSTATUS_DISPC_IDLE_STATUS GENMASK(9, 9) +#define DSS_SYSSTATUS_DISPC_FUNC_RESETDONE GENMASK(0, 0) + #define DISPC_IRQ_EOI REG(DISPC_IRQ_EOI) #define DISPC_IRQSTATUS_RAW REG(DISPC_IRQSTATUS_RAW) #define DISPC_IRQSTATUS REG(DISPC_IRQSTATUS) @@ -70,9 +75,15 @@ enum dispc_common_regs { #define WB_IRQSTATUS REG(WB_IRQSTATUS) #define DISPC_GLOBAL_MFLAG_ATTRIBUTE REG(DISPC_GLOBAL_MFLAG_ATTRIBUTE) +#define DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK GENMASK(6, 6) +#define DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK GENMASK(1, 0) + #define DISPC_GLOBAL_OUTPUT_ENABLE REG(DISPC_GLOBAL_OUTPUT_ENABLE) #define DISPC_GLOBAL_BUFFER REG(DISPC_GLOBAL_BUFFER) #define DSS_CBA_CFG REG(DSS_CBA_CFG) +#define DSS_CBA_CFG_PRI_HI_MASK GENMASK(5, 3) +#define DSS_CBA_CFG_PRI_LO_MASK GENMASK(2, 0) + #define DISPC_DBG_CONTROL REG(DISPC_DBG_CONTROL) #define DISPC_DBG_STATUS REG(DISPC_DBG_STATUS) #define DISPC_CLKGATING_DISABLE REG(DISPC_CLKGATING_DISABLE) @@ -88,6 +99,9 @@ enum dispc_common_regs { #define FBDC_CONSTANT_COLOR_0 REG(FBDC_CONSTANT_COLOR_0) #define FBDC_CONSTANT_COLOR_1 REG(FBDC_CONSTANT_COLOR_1) #define DISPC_CONNECTIONS REG(DISPC_CONNECTIONS) +#define DISPC_CONNECTIONS_DPI_1_CONN_MASK GENMASK(7, 4) +#define DISPC_CONNECTIONS_DPI_0_CONN_MASK GENMASK(3, 0) + #define DISPC_MSS_VP1 REG(DISPC_MSS_VP1) #define DISPC_MSS_VP3 REG(DISPC_MSS_VP3) @@ -102,13 +116,27 @@ enum dispc_common_regs { #define DISPC_VID_ACCUV2_0 0x18 #define DISPC_VID_ACCUV2_1 0x1c #define DISPC_VID_ATTRIBUTES 0x20 +#define DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK GENMASK(28, 28) +#define DISPC_VID_ATTRIBUTES_VERTICALTAPS_MASK GENMASK(21, 21) +#define DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK GENMASK(19, 19) +#define DISPC_VID_ATTRIBUTES_COLORCONVENABLE_MASK GENMASK(9, 9) +#define DISPC_VID_ATTRIBUTES_VRESIZEENABLE_MASK GENMASK(8, 8) +#define DISPC_VID_ATTRIBUTES_HRESIZEENABLE_MASK GENMASK(7, 7) +#define DISPC_VID_ATTRIBUTES_FORMAT_MASK GENMASK(6, 1) +#define DISPC_VID_ATTRIBUTES_ENABLE_MASK GENMASK(0, 0) + #define DISPC_VID_ATTRIBUTES2 0x24 #define DISPC_VID_BA_0 0x28 #define DISPC_VID_BA_1 0x2c #define DISPC_VID_BA_UV_0 0x30 #define DISPC_VID_BA_UV_1 0x34 #define DISPC_VID_BUF_SIZE_STATUS 0x38 +#define DISPC_VID_BUF_SIZE_STATUS_BUFSIZE_MASK GENMASK(15, 0) + #define DISPC_VID_BUF_THRESHOLD 0x3c +#define DISPC_VID_BUF_THRESHOLD_BUFHIGHTHRESHOLD_MASK GENMASK(31, 16) +#define DISPC_VID_BUF_THRESHOLD_BUFLOWTHRESHOLD_MASK GENMASK(15, 0) + #define DISPC_VID_CSC_COEF(n) (0x40 + (n) * 4) #define DISPC_VID_FIRH 0x5c @@ -137,15 +165,26 @@ enum dispc_common_regs { #define DISPC_VID_FIR_COEF_V12_C(phase) (0x1bc + (phase) * 4) #define DISPC_VID_GLOBAL_ALPHA 0x1fc +#define DISPC_VID_GLOBAL_ALPHA_GLOBALALPHA_MASK GENMASK(7, 0) + #define DISPC_VID_K2G_IRQENABLE 0x200 /* K2G */ #define DISPC_VID_K2G_IRQSTATUS 0x204 /* K2G */ #define DISPC_VID_MFLAG_THRESHOLD 0x208 +#define DISPC_VID_MFLAG_THRESHOLD_HT_MFLAG_MASK GENMASK(31, 16) +#define DISPC_VID_MFLAG_THRESHOLD_LT_MFLAG_MASK GENMASK(15, 0) + #define DISPC_VID_PICTURE_SIZE 0x20c +#define DISPC_VID_PICTURE_SIZE_MEMSIZEY_MASK GENMASK(27, 16) +#define DISPC_VID_PICTURE_SIZE_MEMSIZEX_MASK GENMASK(11, 0) + #define DISPC_VID_PIXEL_INC 0x210 #define DISPC_VID_K2G_POSITION 0x214 /* K2G */ #define DISPC_VID_PRELOAD 0x218 #define DISPC_VID_ROW_INC 0x21c #define DISPC_VID_SIZE 0x220 +#define DISPC_VID_SIZE_SIZEY_MASK GENMASK(27, 16) +#define DISPC_VID_SIZE_SIZEX_MASK GENMASK(11, 0) + #define DISPC_VID_BA_EXT_0 0x22c #define DISPC_VID_BA_EXT_1 0x230 #define DISPC_VID_BA_UV_EXT_0 0x234 @@ -173,11 +212,27 @@ enum dispc_common_regs { #define DISPC_OVR_TRANS_COLOR_MIN 0x18 #define DISPC_OVR_TRANS_COLOR_MIN2 0x1c #define DISPC_OVR_ATTRIBUTES(n) (0x20 + (n) * 4) +#define DISPC_OVR_ATTRIBUTES_POSY_MASK GENMASK(30, 19) +#define DISPC_OVR_ATTRIBUTES_POSX_MASK GENMASK(17, 6) +#define DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK GENMASK(4, 1) +#define DISPC_OVR_ATTRIBUTES_ENABLE_MASK GENMASK(0, 0) + #define DISPC_OVR_ATTRIBUTES2(n) (0x34 + (n) * 4) /* J721E */ +#define DISPC_OVR_ATTRIBUTES2_POSY_MASK GENMASK(29, 16) +#define DISPC_OVR_ATTRIBUTES2_POSX_MASK GENMASK(13, 0) + /* VP */ #define DISPC_VP_CONFIG 0x0 +#define DISPC_VP_CONFIG_COLORCONVENABLE_MASK GENMASK(24, 24) +#define DISPC_VP_CONFIG_CPR_MASK GENMASK(15, 15) +#define DISPC_VP_CONFIG_GAMMAENABLE_MASK GENMASK(2, 2) + #define DISPC_VP_CONTROL 0x4 +#define DISPC_VP_CONTROL_DATALINES_MASK GENMASK(10, 8) +#define DISPC_VP_CONTROL_GOBIT_MASK GENMASK(5, 5) +#define DISPC_VP_CONTROL_ENABLE_MASK GENMASK(0, 0) + #define DISPC_VP_CSC_COEF0 0x8 #define DISPC_VP_CSC_COEF1 0xc #define DISPC_VP_CSC_COEF2 0x10 @@ -189,9 +244,28 @@ enum dispc_common_regs { #define DISPC_VP_DATA_CYCLE_2 0x1c #define DISPC_VP_LINE_NUMBER 0x44 #define DISPC_VP_POL_FREQ 0x4c +#define DISPC_VP_POL_FREQ_ALIGN_MASK GENMASK(18, 18) +#define DISPC_VP_POL_FREQ_ONOFF_MASK GENMASK(17, 17) +#define DISPC_VP_POL_FREQ_RF_MASK GENMASK(16, 16) +#define DISPC_VP_POL_FREQ_IEO_MASK GENMASK(15, 15) +#define DISPC_VP_POL_FREQ_IPC_MASK GENMASK(14, 14) +#define DISPC_VP_POL_FREQ_IHS_MASK GENMASK(13, 13) +#define DISPC_VP_POL_FREQ_IVS_MASK GENMASK(12, 12) + #define DISPC_VP_SIZE_SCREEN 0x50 +#define DISPC_VP_SIZE_SCREEN_HDISPLAY_MASK GENMASK(11, 0) +#define DISPC_VP_SIZE_SCREEN_VDISPLAY_MASK GENMASK(27, 16) + #define DISPC_VP_TIMING_H 0x54 +#define DISPC_VP_TIMING_H_SYNC_PULSE_MASK GENMASK(7, 0) +#define DISPC_VP_TIMING_H_FRONT_PORCH_MASK GENMASK(19, 8) +#define DISPC_VP_TIMING_H_BACK_PORCH_MASK GENMASK(31, 20) + #define DISPC_VP_TIMING_V 0x58 +#define DISPC_VP_TIMING_V_SYNC_PULSE_MASK GENMASK(7, 0) +#define DISPC_VP_TIMING_V_FRONT_PORCH_MASK GENMASK(19, 8) +#define DISPC_VP_TIMING_V_BACK_PORCH_MASK GENMASK(31, 20) + #define DISPC_VP_CSC_COEF3 0x5c #define DISPC_VP_CSC_COEF4 0x60 #define DISPC_VP_CSC_COEF5 0x64 @@ -220,6 +294,8 @@ enum dispc_common_regs { #define DISPC_VP_SAFETY_LFSR_SEED 0x110 #define DISPC_VP_GAMMA_TABLE 0x120 #define DISPC_VP_DSS_OLDI_CFG 0x160 +#define DISPC_VP_DSS_OLDI_CFG_MAP_MASK GENMASK(3, 1) + #define DISPC_VP_DSS_OLDI_STATUS 0x164 #define DISPC_VP_DSS_OLDI_LB 0x168 #define DISPC_VP_DSS_MERGE_SPLIT 0x16c /* J721E */ diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 06e54694a7f2..94a5bf61a115 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -82,6 +82,21 @@ config DRM_PANEL_MIPI_DBI https://github.com/notro/panel-mipi-dbi/wiki. To compile this driver as a module, choose M here. +config DRM_PIXPAPER + tristate "DRM support for PIXPAPER display panels" + depends on DRM && SPI + select DRM_CLIENT_SELECTION + select DRM_GEM_DMA_HELPER + select DRM_KMS_HELPER + help + DRM driver for the Mayqueen Pixpaper e-ink display panel. + + This driver supports small e-paper displays connected over SPI, + with a resolution of 122x250 and XRGB8888 framebuffer format. + It is intended for low-power embedded applications. + + If M is selected, the module will be built as pixpaper.ko. + config TINYDRM_HX8357D tristate "DRM support for HX8357D display panels" depends on DRM && SPI diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile index 4a9ff61ec254..48d30bf6152f 100644 --- a/drivers/gpu/drm/tiny/Makefile +++ b/drivers/gpu/drm/tiny/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs.o obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus-qemu.o obj-$(CONFIG_DRM_GM12U320) += gm12u320.o obj-$(CONFIG_DRM_PANEL_MIPI_DBI) += panel-mipi-dbi.o +obj-$(CONFIG_DRM_PIXPAPER) += pixpaper.o obj-$(CONFIG_TINYDRM_HX8357D) += hx8357d.o obj-$(CONFIG_TINYDRM_ILI9163) += ili9163.o obj-$(CONFIG_TINYDRM_ILI9225) += ili9225.o diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 8d3b7c4fa6a4..d2d5e9f1269f 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -252,7 +252,7 @@ static int bochs_hw_init(struct bochs_device *bochs) } bochs->ioports = 1; } else { - dev_err(dev->dev, "I/O ports are not supported\n"); + drm_err(dev, "I/O ports are not supported\n"); return -EIO; } diff --git a/drivers/gpu/drm/tiny/pixpaper.c b/drivers/gpu/drm/tiny/pixpaper.c new file mode 100644 index 000000000000..b1379cb5f030 --- /dev/null +++ b/drivers/gpu/drm/tiny/pixpaper.c @@ -0,0 +1,1165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DRM driver for PIXPAPER e-ink panel + * + * Author: LiangCheng Wang <zaq14760@gmail.com>, + */ +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/spi/spi.h> + +#include <drm/clients/drm_client_setup.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_fbdev_shmem.h> +#include <drm/drm_framebuffer.h> +#include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_probe_helper.h> + +/* + * Note on Undocumented Commands/Registers: + * + * Several commands and register parameters defined in this header are not + * documented in the datasheet. Their values and usage have been derived + * through analysis of existing userspace example programs. + * + * These 'unknown' definitions are crucial for the proper initialization + * and stable operation of the panel. Modifying these values without + * thorough understanding may lead to display anomalies, panel damage, + * or unexpected behavior. + */ + +/* Command definitions */ +#define PIXPAPER_CMD_PANEL_SETTING 0x00 /* R00H: Panel settings */ +#define PIXPAPER_CMD_POWER_SETTING 0x01 /* R01H: Power settings */ +#define PIXPAPER_CMD_POWER_OFF 0x02 /* R02H: Power off */ +#define PIXPAPER_CMD_POWER_OFF_SEQUENCE 0x03 /* R03H: Power off sequence */ +#define PIXPAPER_CMD_POWER_ON 0x04 /* R04H: Power on */ +#define PIXPAPER_CMD_BOOSTER_SOFT_START 0x06 /* R06H: Booster soft start */ +#define PIXPAPER_CMD_DEEP_SLEEP 0x07 /* R07H: Deep sleep */ +#define PIXPAPER_CMD_DATA_START_TRANSMISSION 0x10 +/* R10H: Data transmission start */ +#define PIXPAPER_CMD_DISPLAY_REFRESH 0x12 /* R12H: Display refresh */ +#define PIXPAPER_CMD_PLL_CONTROL 0x30 /* R30H: PLL control */ +#define PIXPAPER_CMD_TEMP_SENSOR_CALIB 0x41 +/* R41H: Temperature sensor calibration */ +#define PIXPAPER_CMD_UNKNOWN_4D 0x4D /* R4DH: Unknown command */ +#define PIXPAPER_CMD_VCOM_INTERVAL 0x50 /* R50H: VCOM interval */ +#define PIXPAPER_CMD_UNKNOWN_60 0x60 /* R60H: Unknown command */ +#define PIXPAPER_CMD_RESOLUTION_SETTING 0x61 /* R61H: Resolution settings */ +#define PIXPAPER_CMD_GATE_SOURCE_START 0x65 /* R65H: Gate/source start */ +#define PIXPAPER_CMD_UNKNOWN_B4 0xB4 /* RB4H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_B5 0xB5 /* RB5H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_E0 0xE0 /* RE0H: Unknown command */ +#define PIXPAPER_CMD_POWER_SAVING 0xE3 /* RE3H: Power saving */ +#define PIXPAPER_CMD_UNKNOWN_E7 0xE7 /* RE7H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_E9 0xE9 /* RE9H: Unknown command */ + +/* R00H PSR - First Parameter */ +#define PIXPAPER_PSR_RST_N BIT(0) +/* Bit 0: RST_N, 1=no effect (default), 0=reset with booster OFF */ +#define PIXPAPER_PSR_SHD_N BIT(1) +/* Bit 1: SHD_N, 1=booster ON (default), 0=booster OFF */ +#define PIXPAPER_PSR_SHL BIT(2) +/* Bit 2: SHL, 1=shift right (default), 0=shift left */ +#define PIXPAPER_PSR_UD BIT(3) +/* Bit 3: UD, 1=scan up (default), 0=scan down */ +#define PIXPAPER_PSR_PST_MODE BIT(5) +/* Bit 5: PST_MODE, 0=frame scanning (default), 1=external */ +#define PIXPAPER_PSR_RES_MASK (3 << 6) +/* Bits 7-6: RES[1:0], resolution setting */ +#define PIXPAPER_PSR_RES_176x296 (0x0 << 6) /* 00: 176x296 */ +#define PIXPAPER_PSR_RES_128x296 (0x1 << 6) /* 01: 128x296 */ +#define PIXPAPER_PSR_RES_128x250 (0x2 << 6) /* 10: 128x250 */ +#define PIXPAPER_PSR_RES_112x204 (0x3 << 6) /* 11: 112x204 */ +#define PIXPAPER_PSR_CONFIG \ + (PIXPAPER_PSR_RST_N | PIXPAPER_PSR_SHD_N | PIXPAPER_PSR_SHL | \ + PIXPAPER_PSR_UD) +/* 0x0F: Default settings, resolution set by R61H */ + +/* R00H PSR - Second Parameter */ +#define PIXPAPER_PSR2_VC_LUTZ \ + (1 << 0) /* Bit 0: VC_LUTZ, 1=VCOM float after refresh (default), 0=no effect */ +#define PIXPAPER_PSR2_NORG \ + (1 << 1) /* Bit 1: NORG, 1=VCOM to GND before power off, 0=no effect (default) */ +#define PIXPAPER_PSR2_TIEG \ + (1 << 2) /* Bit 2: TIEG, 1=VGN to GND on power off, 0=no effect (default) */ +#define PIXPAPER_PSR2_TS_AUTO \ + (1 << 3) /* Bit 3: TS_AUTO, 1=sensor on RST_N low to high (default), 0=on booster */ +#define PIXPAPER_PSR2_VCMZ \ + (1 << 4) /* Bit 4: VCMZ, 1=VCOM always floating, 0=no effect (default) */ +#define PIXPAPER_PSR2_FOPT \ + (1 << 5) /* Bit 5: FOPT, 0=scan 1 frame (default), 1=no scan, HiZ */ +#define PIXPAPER_PSR_CONFIG2 \ + (PIXPAPER_PSR2_VC_LUTZ | \ + PIXPAPER_PSR2_TS_AUTO) /* 0x09: Default VCOM and temp sensor settings */ + +/* R01H PWR - Power Setting Register */ +/* First Parameter */ +#define PIXPAPER_PWR_VDG_EN \ + (1 << 0) /* Bit 0: VDG_EN, 1=internal DCDC for VGP/VGN (default), 0=external */ +#define PIXPAPER_PWR_VDS_EN \ + (1 << 1) /* Bit 1: VDS_EN, 1=internal regulator for VSP/VSN (default), 0=external */ +#define PIXPAPER_PWR_VSC_EN \ + (1 << 2) /* Bit 2: VSC_EN, 1=internal regulator for VSPL (default), 0=external */ +#define PIXPAPER_PWR_V_MODE \ + (1 << 3) /* Bit 3: V_MODE, 0=Mode0 (default), 1=Mode1 */ +#define PIXPAPER_PWR_CONFIG1 \ + (PIXPAPER_PWR_VDG_EN | PIXPAPER_PWR_VDS_EN | \ + PIXPAPER_PWR_VSC_EN) /* 0x07: Internal power for VGP/VGN, VSP/VSN, VSPL */ + +/* Second Parameter */ +#define PIXPAPER_PWR_VGPN_MASK \ + (3 << 0) /* Bits 1-0: VGPN, VGP/VGN voltage levels */ +#define PIXPAPER_PWR_VGPN_20V (0x0 << 0) /* 00: VGP=20V, VGN=-20V (default) */ +#define PIXPAPER_PWR_VGPN_17V (0x1 << 0) /* 01: VGP=17V, VGN=-17V */ +#define PIXPAPER_PWR_VGPN_15V (0x2 << 0) /* 10: VGP=15V, VGN=-15V */ +#define PIXPAPER_PWR_VGPN_10V (0x3 << 0) /* 11: VGP=10V, VGN=-10V */ +#define PIXPAPER_PWR_CONFIG2 PIXPAPER_PWR_VGPN_20V /* 0x00: VGP=20V, VGN=-20V */ + +/* Third, Fourth, Sixth Parameters (VSP_1, VSPL_0, VSPL_1) */ +#define PIXPAPER_PWR_VSP_8_2V 0x22 /* VSP_1/VSPL_1: 8.2V (34 decimal) */ +#define PIXPAPER_PWR_VSPL_15V 0x78 /* VSPL_0: 15V (120 decimal) */ + +/* Fifth Parameter (VSN_1) */ +#define PIXPAPER_PWR_VSN_4V 0x0A /* VSN_1: -4V (10 decimal) */ + +/* R03H PFS - Power Off Sequence Setting Register */ +/* First Parameter */ +#define PIXPAPER_PFS_T_VDS_OFF_MASK \ + (3 << 0) /* Bits 1-0: T_VDS_OFF, VSP/VSN power-off sequence */ +#define PIXPAPER_PFS_T_VDS_OFF_20MS (0x0 << 0) /* 00: 20 ms (default) */ +#define PIXPAPER_PFS_T_VDS_OFF_40MS (0x1 << 0) /* 01: 40 ms */ +#define PIXPAPER_PFS_T_VDS_OFF_60MS (0x2 << 0) /* 10: 60 ms */ +#define PIXPAPER_PFS_T_VDS_OFF_80MS (0x3 << 0) /* 11: 80 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_MASK \ + (3 << 4) /* Bits 5-4: T_VDPG_OFF, VGP/VGN power-off sequence */ +#define PIXPAPER_PFS_T_VDPG_OFF_20MS (0x0 << 4) /* 00: 20 ms (default) */ +#define PIXPAPER_PFS_T_VDPG_OFF_40MS (0x1 << 4) /* 01: 40 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_60MS (0x2 << 4) /* 10: 60 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_80MS (0x3 << 4) /* 11: 80 ms */ +#define PIXPAPER_PFS_CONFIG1 \ + (PIXPAPER_PFS_T_VDS_OFF_20MS | \ + PIXPAPER_PFS_T_VDPG_OFF_20MS) /* 0x10: Default 20 ms for VSP/VSN and VGP/VGN */ + +/* Second Parameter */ +#define PIXPAPER_PFS_VGP_EXT_MASK \ + (0xF << 0) /* Bits 3-0: VGP_EXT, VGP extension time */ +#define PIXPAPER_PFS_VGP_EXT_0MS (0x0 << 0) /* 0000: 0 ms */ +#define PIXPAPER_PFS_VGP_EXT_500MS (0x1 << 0) /* 0001: 500 ms */ +#define PIXPAPER_PFS_VGP_EXT_1000MS (0x2 << 0) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_VGP_EXT_1500MS (0x3 << 0) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_VGP_EXT_2000MS (0x4 << 0) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_VGP_EXT_2500MS (0x5 << 0) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_VGP_EXT_3000MS (0x6 << 0) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_VGP_EXT_3500MS (0x7 << 0) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_VGP_EXT_4000MS (0x8 << 0) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_VGP_EXT_4500MS (0x9 << 0) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_VGP_EXT_5000MS (0xA << 0) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_VGP_EXT_5500MS (0xB << 0) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_VGP_EXT_6000MS (0xC << 0) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_VGP_EXT_6500MS (0xD << 0) /* 1101: 6500 ms */ +#define PIXPAPER_PFS_VGP_LEN_MASK \ + (0xF << 4) /* Bits 7-4: VGP_LEN, VGP at 10V during power-off */ +#define PIXPAPER_PFS_VGP_LEN_0MS (0x0 << 4) /* 0000: 0 ms */ +#define PIXPAPER_PFS_VGP_LEN_500MS (0x1 << 4) /* 0001: 500 ms */ +#define PIXPAPER_PFS_VGP_LEN_1000MS (0x2 << 4) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_VGP_LEN_1500MS (0x3 << 4) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_VGP_LEN_2000MS (0x4 << 4) /* 0100: 2000 ms */ +#define PIXPAPER_PFS_VGP_LEN_2500MS (0x5 << 4) /* 0101: 2500 ms (default) */ +#define PIXPAPER_PFS_VGP_LEN_3000MS (0x6 << 4) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_VGP_LEN_3500MS (0x7 << 4) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_VGP_LEN_4000MS (0x8 << 4) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_VGP_LEN_4500MS (0x9 << 4) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_VGP_LEN_5000MS (0xA << 4) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_VGP_LEN_5500MS (0xB << 4) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_VGP_LEN_6000MS (0xC << 4) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_VGP_LEN_6500MS (0xD << 4) /* 1101: 6500 ms */ +#define PIXPAPER_PFS_CONFIG2 \ + (PIXPAPER_PFS_VGP_EXT_1000MS | \ + PIXPAPER_PFS_VGP_LEN_2500MS) /* 0x54: VGP extension 1000 ms, VGP at 10V for 2500 ms */ + +/* Third Parameter */ +#define PIXPAPER_PFS_XON_LEN_MASK \ + (0xF << 0) /* Bits 3-0: XON_LEN, XON enable time */ +#define PIXPAPER_PFS_XON_LEN_0MS (0x0 << 0) /* 0000: 0 ms */ +#define PIXPAPER_PFS_XON_LEN_500MS (0x1 << 0) /* 0001: 500 ms */ +#define PIXPAPER_PFS_XON_LEN_1000MS (0x2 << 0) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_XON_LEN_1500MS (0x3 << 0) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_XON_LEN_2000MS (0x4 << 0) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_XON_LEN_2500MS (0x5 << 0) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_XON_LEN_3000MS (0x6 << 0) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_XON_LEN_3500MS (0x7 << 0) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_XON_LEN_4000MS (0x8 << 0) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_XON_LEN_4500MS (0x9 << 0) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_XON_LEN_5000MS (0xA << 0) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_XON_LEN_5500MS (0xB << 0) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_XON_LEN_6000MS (0xC << 0) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_XON_DLY_MASK \ + (0xF << 4) /* Bits 7-4: XON_DLY, XON delay time */ +#define PIXPAPER_PFS_XON_DLY_0MS (0x0 << 4) /* 0000: 0 ms */ +#define PIXPAPER_PFS_XON_DLY_500MS (0x1 << 4) /* 0001: 500 ms */ +#define PIXPAPER_PFS_XON_DLY_1000MS (0x2 << 4) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_XON_DLY_1500MS (0x3 << 4) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_XON_DLY_2000MS (0x4 << 4) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_XON_DLY_2500MS (0x5 << 4) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_XON_DLY_3000MS (0x6 << 4) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_XON_DLY_3500MS (0x7 << 4) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_XON_DLY_4000MS (0x8 << 4) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_XON_DLY_4500MS (0x9 << 4) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_XON_DLY_5000MS (0xA << 4) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_XON_DLY_5500MS (0xB << 4) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_XON_DLY_6000MS (0xC << 4) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_CONFIG3 \ + (PIXPAPER_PFS_XON_LEN_2000MS | \ + PIXPAPER_PFS_XON_DLY_2000MS) /* 0x44: XON enable and delay at 2000 ms */ + +/* R06H BTST - Booster Soft Start Command */ +/* First Parameter */ +#define PIXPAPER_BTST_PHA_SFT_MASK \ + (3 << 0) /* Bits 1-0: PHA_SFT, soft start period for phase A */ +#define PIXPAPER_BTST_PHA_SFT_10MS (0x0 << 0) /* 00: 10 ms (default) */ +#define PIXPAPER_BTST_PHA_SFT_20MS (0x1 << 0) /* 01: 20 ms */ +#define PIXPAPER_BTST_PHA_SFT_30MS (0x2 << 0) /* 10: 30 ms */ +#define PIXPAPER_BTST_PHA_SFT_40MS (0x3 << 0) /* 11: 40 ms */ +#define PIXPAPER_BTST_PHB_SFT_MASK \ + (3 << 2) /* Bits 3-2: PHB_SFT, soft start period for phase B */ +#define PIXPAPER_BTST_PHB_SFT_10MS (0x0 << 2) /* 00: 10 ms (default) */ +#define PIXPAPER_BTST_PHB_SFT_20MS (0x1 << 2) /* 01: 20 ms */ +#define PIXPAPER_BTST_PHB_SFT_30MS (0x2 << 2) /* 10: 30 ms */ +#define PIXPAPER_BTST_PHB_SFT_40MS (0x3 << 2) /* 11: 40 ms */ +#define PIXPAPER_BTST_CONFIG1 \ + (PIXPAPER_BTST_PHA_SFT_40MS | \ + PIXPAPER_BTST_PHB_SFT_40MS) /* 0x0F: 40 ms for phase A and B */ + +/* Second to Seventh Parameters (Driving Strength or Minimum OFF Time) */ +#define PIXPAPER_BTST_CONFIG2 0x0A /* Strength11 */ +#define PIXPAPER_BTST_CONFIG3 0x2F /* Period48 */ +#define PIXPAPER_BTST_CONFIG4 0x25 /* Strength38 */ +#define PIXPAPER_BTST_CONFIG5 0x22 /* Period35 */ +#define PIXPAPER_BTST_CONFIG6 0x2E /* Strength47 */ +#define PIXPAPER_BTST_CONFIG7 0x21 /* Period34 */ + +/* R12H: DRF (Display Refresh) */ +#define PIXPAPER_DRF_VCOM_AC 0x00 /* AC VCOM: VCOM follows LUTC (default) */ +#define PIXPAPER_DRF_VCOM_DC 0x01 /* DC VCOM: VCOM fixed to VCOMDC */ + +/* R30H PLL - PLL Control Register */ +/* First Parameter */ +#define PIXPAPER_PLL_FR_MASK (0x7 << 0) /* Bits 2-0: FR, frame rate */ +#define PIXPAPER_PLL_FR_12_5HZ (0x0 << 0) /* 000: 12.5 Hz */ +#define PIXPAPER_PLL_FR_25HZ (0x1 << 0) /* 001: 25 Hz */ +#define PIXPAPER_PLL_FR_50HZ (0x2 << 0) /* 010: 50 Hz (default) */ +#define PIXPAPER_PLL_FR_65HZ (0x3 << 0) /* 011: 65 Hz */ +#define PIXPAPER_PLL_FR_75HZ (0x4 << 0) /* 100: 75 Hz */ +#define PIXPAPER_PLL_FR_85HZ (0x5 << 0) /* 101: 85 Hz */ +#define PIXPAPER_PLL_FR_100HZ (0x6 << 0) /* 110: 100 Hz */ +#define PIXPAPER_PLL_FR_120HZ (0x7 << 0) /* 111: 120 Hz */ +#define PIXPAPER_PLL_DFR \ + (1 << 3) /* Bit 3: Dynamic frame rate, 0=disabled (default), 1=enabled */ +#define PIXPAPER_PLL_CONFIG \ + (PIXPAPER_PLL_FR_50HZ) /* 0x02: 50 Hz, dynamic frame rate disabled */ + +/* R41H TSE - Temperature Sensor Calibration Register */ +/* First Parameter */ +#define PIXPAPER_TSE_TO_MASK \ + (0xF << 0) /* Bits 3-0: TO[3:0], temperature offset */ +#define PIXPAPER_TSE_TO_POS_0C (0x0 << 0) /* 0000: +0°C (default) */ +#define PIXPAPER_TSE_TO_POS_0_5C (0x1 << 0) /* 0001: +0.5°C */ +#define PIXPAPER_TSE_TO_POS_1C (0x2 << 0) /* 0010: +1°C */ +#define PIXPAPER_TSE_TO_POS_1_5C (0x3 << 0) /* 0011: +1.5°C */ +#define PIXPAPER_TSE_TO_POS_2C (0x4 << 0) /* 0100: +2°C */ +#define PIXPAPER_TSE_TO_POS_2_5C (0x5 << 0) /* 0101: +2.5°C */ +#define PIXPAPER_TSE_TO_POS_3C (0x6 << 0) /* 0110: +3°C */ +#define PIXPAPER_TSE_TO_POS_3_5C (0x7 << 0) /* 0111: +3.5°C */ +#define PIXPAPER_TSE_TO_NEG_4C (0x8 << 0) /* 1000: -4°C */ +#define PIXPAPER_TSE_TO_NEG_3_5C (0x9 << 0) /* 1001: -3.5°C */ +#define PIXPAPER_TSE_TO_NEG_3C (0xA << 0) /* 1010: -3°C */ +#define PIXPAPER_TSE_TO_NEG_2_5C (0xB << 0) /* 1011: -2.5°C */ +#define PIXPAPER_TSE_TO_NEG_2C (0xC << 0) /* 1100: -2°C */ +#define PIXPAPER_TSE_TO_NEG_1_5C (0xD << 0) /* 1101: -1.5°C */ +#define PIXPAPER_TSE_TO_NEG_1C (0xE << 0) /* 1110: -1°C */ +#define PIXPAPER_TSE_TO_NEG_0_5C (0xF << 0) /* 1111: -0.5°C */ +#define PIXPAPER_TSE_TO_FINE_MASK \ + (0x3 << 4) /* Bits 5-4: TO[5:4], fine adjustment for positive offsets */ +#define PIXPAPER_TSE_TO_FINE_0C (0x0 << 4) /* 00: +0.0°C (default) */ +#define PIXPAPER_TSE_TO_FINE_0_25C (0x1 << 4) /* 01: +0.25°C */ +#define PIXPAPER_TSE_ENABLE \ + (0 << 7) /* Bit 7: TSE, 0=internal sensor enabled (default), 1=disabled (external) */ +#define PIXPAPER_TSE_DISABLE \ + (1 << 7) /* Bit 7: TSE, 1=internal sensor disabled, use external */ +#define PIXPAPER_TSE_CONFIG \ + (PIXPAPER_TSE_TO_POS_0C | PIXPAPER_TSE_TO_FINE_0C | \ + PIXPAPER_TSE_ENABLE) /* 0x00: Internal sensor enabled, +0°C offset */ + +/* R4DH */ +#define PIXPAPER_UNKNOWN_4D_CONFIG \ + 0x78 /* This value is essential for initialization, derived from userspace examples. */ + +/* R50H CDI - VCOM and DATA Interval Setting Register */ +/* First Parameter */ +#define PIXPAPER_CDI_INTERVAL_MASK \ + (0xF << 0) /* Bits 3-0: CDI[3:0], VCOM and data interval (hsync) */ +#define PIXPAPER_CDI_17_HSYNC (0x0 << 0) /* 0000: 17 hsync */ +#define PIXPAPER_CDI_16_HSYNC (0x1 << 0) /* 0001: 16 hsync */ +#define PIXPAPER_CDI_15_HSYNC (0x2 << 0) /* 0010: 15 hsync */ +#define PIXPAPER_CDI_14_HSYNC (0x3 << 0) /* 0011: 14 hsync */ +#define PIXPAPER_CDI_13_HSYNC (0x4 << 0) /* 0100: 13 hsync */ +#define PIXPAPER_CDI_12_HSYNC (0x5 << 0) /* 0101: 12 hsync */ +#define PIXPAPER_CDI_11_HSYNC (0x6 << 0) /* 0110: 11 hsync */ +#define PIXPAPER_CDI_10_HSYNC (0x7 << 0) /* 0111: 10 hsync (default) */ +#define PIXPAPER_CDI_9_HSYNC (0x8 << 0) /* 1000: 9 hsync */ +#define PIXPAPER_CDI_8_HSYNC (0x9 << 0) /* 1001: 8 hsync */ +#define PIXPAPER_CDI_7_HSYNC (0xA << 0) /* 1010: 7 hsync */ +#define PIXPAPER_CDI_6_HSYNC (0xB << 0) /* 1011: 6 hsync */ +#define PIXPAPER_CDI_5_HSYNC (0xC << 0) /* 1100: 5 hsync */ +#define PIXPAPER_CDI_4_HSYNC (0xD << 0) /* 1101: 4 hsync */ +#define PIXPAPER_CDI_3_HSYNC (0xE << 0) /* 1110: 3 hsync */ +#define PIXPAPER_CDI_2_HSYNC (0xF << 0) /* 1111: 2 hsync */ +#define PIXPAPER_CDI_DDX \ + (1 << 4) /* Bit 4: DDX, 0=grayscale mapping 0, 1=grayscale mapping 1 (default) */ +#define PIXPAPER_CDI_VBD_MASK \ + (0x7 << 5) /* Bits 7-5: VBD[2:0], border data selection */ +#define PIXPAPER_CDI_VBD_FLOAT (0x0 << 5) /* 000: Floating (DDX=0 or 1) */ +#define PIXPAPER_CDI_VBD_GRAY3_DDX0 \ + (0x1 << 5) /* 001: Gray3 (border_buf=011) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY2_DDX0 \ + (0x2 << 5) /* 010: Gray2 (border_buf=010) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY1_DDX0 \ + (0x3 << 5) /* 011: Gray1 (border_buf=001) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY0_DDX0 \ + (0x4 << 5) /* 100: Gray0 (border_buf=000) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY0_DDX1 \ + (0x0 << 5) /* 000: Gray0 (border_buf=000) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY1_DDX1 \ + (0x1 << 5) /* 001: Gray1 (border_buf=001) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY2_DDX1 \ + (0x2 << 5) /* 010: Gray2 (border_buf=010) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY3_DDX1 \ + (0x3 << 5) /* 011: Gray3 (border_buf=011) when DDX=1 */ +#define PIXPAPER_CDI_VBD_FLOAT_DDX1 (0x4 << 5) /* 100: Floating when DDX=1 */ +#define PIXPAPER_CDI_CONFIG \ + (PIXPAPER_CDI_10_HSYNC | PIXPAPER_CDI_DDX | \ + PIXPAPER_CDI_VBD_GRAY1_DDX1) /* 0x37: 10 hsync, DDX=1, border Gray1 */ + +/* R60H */ +#define PIXPAPER_UNKNOWN_60_CONFIG1 \ + 0x02 /* This value is essential for initialization, derived from userspace examples. */ +#define PIXPAPER_UNKNOWN_60_CONFIG2 \ + 0x02 /* This value is essential for initialization, derived from userspace examples. */ + +/* R61H TRES - Resolution Setting Register */ +#define PIXPAPER_TRES_HRES_H \ + ((PIXPAPER_PANEL_BUFFER_WIDTH >> 8) & \ + 0xFF) /* HRES[9:8]: High byte of horizontal resolution (128) */ +#define PIXPAPER_TRES_HRES_L \ + (PIXPAPER_PANEL_BUFFER_WIDTH & \ + 0xFF) /* HRES[7:0]: Low byte of horizontal resolution (128 = 0x80) */ +#define PIXPAPER_TRES_VRES_H \ + ((PIXPAPER_HEIGHT >> 8) & \ + 0xFF) /* VRES[9:8]: High byte of vertical resolution (250) */ +#define PIXPAPER_TRES_VRES_L \ + (PIXPAPER_HEIGHT & \ + 0xFF) /* VRES[7:0]: Low byte of vertical resolution (250 = 0xFA) */ + +/* R65H GSST - Gate/Source Start Setting Register */ +#define PIXPAPER_GSST_S_START 0x00 /* S_Start[7:0]: First source line (S0) */ +#define PIXPAPER_GSST_RESERVED 0x00 /* Reserved byte */ +#define PIXPAPER_GSST_G_START_H \ + 0x00 /* G_Start[8]: High bit of first gate line (G0) */ +#define PIXPAPER_GSST_G_START_L \ + 0x00 /* G_Start[7:0]: Low byte of first gate line (G0) */ + +/* RB4H */ +#define PIXPAPER_UNKNOWN_B4_CONFIG \ + 0xD0 /* This value is essential for initialization, derived from userspace examples. */ + +/* RB5H */ +#define PIXPAPER_UNKNOWN_B5_CONFIG \ + 0x03 /* This value is essential for initialization, derived from userspace examples. */ + +/* RE0H */ +#define PIXPAPER_UNKNOWN_E0_CONFIG \ + 0x00 /* This value is essential for initialization, derived from userspace examples. */ + +/* RE3H PWS - Power Saving Register */ +/* First Parameter */ +#define PIXPAPER_PWS_VCOM_W_MASK \ + (0xF \ + << 4) /* Bits 7-4: VCOM_W[3:0], VCOM power-saving width (line periods) */ +#define PIXPAPER_PWS_VCOM_W_0 (0x0 << 4) /* 0000: 0 line periods */ +#define PIXPAPER_PWS_VCOM_W_1 (0x1 << 4) /* 0001: 1 line period */ +#define PIXPAPER_PWS_VCOM_W_2 (0x2 << 4) /* 0010: 2 line periods */ +#define PIXPAPER_PWS_VCOM_W_3 (0x3 << 4) /* 0011: 3 line periods */ +#define PIXPAPER_PWS_VCOM_W_4 (0x4 << 4) /* 0100: 4 line periods */ +#define PIXPAPER_PWS_VCOM_W_5 (0x5 << 4) /* 0101: 5 line periods */ +#define PIXPAPER_PWS_VCOM_W_6 (0x6 << 4) /* 0110: 6 line periods */ +#define PIXPAPER_PWS_VCOM_W_7 (0x7 << 4) /* 0111: 7 line periods */ +#define PIXPAPER_PWS_VCOM_W_8 (0x8 << 4) /* 1000: 8 line periods */ +#define PIXPAPER_PWS_VCOM_W_9 (0x9 << 4) /* 1001: 9 line periods */ +#define PIXPAPER_PWS_VCOM_W_10 (0xA << 4) /* 1010: 10 line periods */ +#define PIXPAPER_PWS_VCOM_W_11 (0xB << 4) /* 1011: 11 line periods */ +#define PIXPAPER_PWS_VCOM_W_12 (0xC << 4) /* 1100: 12 line periods */ +#define PIXPAPER_PWS_VCOM_W_13 (0xD << 4) /* 1101: 13 line periods */ +#define PIXPAPER_PWS_VCOM_W_14 (0xE << 4) /* 1110: 14 line periods */ +#define PIXPAPER_PWS_VCOM_W_15 (0xF << 4) /* 1111: 15 line periods */ +#define PIXPAPER_PWS_SD_W_MASK \ + (0xF << 0) /* Bits 3-0: SD_W[3:0], source power-saving width (660 ns units) */ +#define PIXPAPER_PWS_SD_W_0 (0x0 << 0) /* 0000: 0 ns */ +#define PIXPAPER_PWS_SD_W_1 (0x1 << 0) /* 0001: 660 ns */ +#define PIXPAPER_PWS_SD_W_2 (0x2 << 0) /* 0010: 1320 ns */ +#define PIXPAPER_PWS_SD_W_3 (0x3 << 0) /* 0011: 1980 ns */ +#define PIXPAPER_PWS_SD_W_4 (0x4 << 0) /* 0100: 2640 ns */ +#define PIXPAPER_PWS_SD_W_5 (0x5 << 0) /* 0101: 3300 ns */ +#define PIXPAPER_PWS_SD_W_6 (0x6 << 0) /* 0110: 3960 ns */ +#define PIXPAPER_PWS_SD_W_7 (0x7 << 0) /* 0111: 4620 ns */ +#define PIXPAPER_PWS_SD_W_8 (0x8 << 0) /* 1000: 5280 ns */ +#define PIXPAPER_PWS_SD_W_9 (0x9 << 0) /* 1001: 5940 ns */ +#define PIXPAPER_PWS_SD_W_10 (0xA << 0) /* 1010: 6600 ns */ +#define PIXPAPER_PWS_SD_W_11 (0xB << 0) /* 1011: 7260 ns */ +#define PIXPAPER_PWS_SD_W_12 (0xC << 0) /* 1100: 7920 ns */ +#define PIXPAPER_PWS_SD_W_13 (0xD << 0) /* 1101: 8580 ns */ +#define PIXPAPER_PWS_SD_W_14 (0xE << 0) /* 1110: 9240 ns */ +#define PIXPAPER_PWS_SD_W_15 (0xF << 0) /* 1111: 9900 ns */ +#define PIXPAPER_PWS_CONFIG \ + (PIXPAPER_PWS_VCOM_W_2 | \ + PIXPAPER_PWS_SD_W_2) /* 0x22: VCOM 2 line periods (160 µs), source 1320 ns */ + +/* RE7H */ +#define PIXPAPER_UNKNOWN_E7_CONFIG \ + 0x1C /* This value is essential for initialization, derived from userspace examples. */ + +/* RE9H */ +#define PIXPAPER_UNKNOWN_E9_CONFIG \ + 0x01 /* This value is essential for initialization, derived from userspace examples. */ + +MODULE_IMPORT_NS("DMA_BUF"); + +/* + * The panel has a visible resolution of 122x250. + * However, the controller requires the horizontal resolution to be aligned to 128 pixels. + * No porch or sync timing values are provided in the datasheet, so we define minimal + * placeholder values to satisfy the DRM framework. + */ + +/* Panel visible resolution */ +#define PIXPAPER_WIDTH 122 +#define PIXPAPER_HEIGHT 250 + +/* Controller requires 128 horizontal pixels total (for memory alignment) */ +#define PIXPAPER_HTOTAL 128 +#define PIXPAPER_HFP 2 +#define PIXPAPER_HSYNC 2 +#define PIXPAPER_HBP (PIXPAPER_HTOTAL - PIXPAPER_WIDTH - PIXPAPER_HFP - PIXPAPER_HSYNC) + +/* + * According to the datasheet, the total vertical blanking must be 55 lines, + * regardless of how the vertical back porch is set. + * Here we allocate VFP=2, VSYNC=2, and VBP=51 to sum up to 55 lines. + * Total vertical lines = 250 (visible) + 55 (blanking) = 305. + */ +#define PIXPAPER_VTOTAL (250 + 55) +#define PIXPAPER_VFP 2 +#define PIXPAPER_VSYNC 2 +#define PIXPAPER_VBP (55 - PIXPAPER_VFP - PIXPAPER_VSYNC) + +/* + * Pixel clock calculation: + * pixel_clock = htotal * vtotal * refresh_rate + * = 128 * 305 * 50 + * = 1,952,000 Hz = 1952 kHz + */ +#define PIXPAPER_PIXEL_CLOCK 1952 + +#define PIXPAPER_WIDTH_MM 24 /* approximate from 23.7046mm */ +#define PIXPAPER_HEIGHT_MM 49 /* approximate from 48.55mm */ + +#define PIXPAPER_SPI_BITS_PER_WORD 8 +#define PIXPAPER_SPI_SPEED_DEFAULT 1000000 + +#define PIXPAPER_PANEL_BUFFER_WIDTH 128 +#define PIXPAPER_PANEL_BUFFER_TWO_BYTES_PER_ROW (PIXPAPER_PANEL_BUFFER_WIDTH / 4) + +#define PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL 60 +#define PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL 200 +#define PIXPAPER_COLOR_THRESHOLD_YELLOW_MIN_GREEN 180 + +struct pixpaper_error_ctx { + int errno_code; +}; + +struct pixpaper_panel { + struct drm_device drm; + struct drm_plane plane; + struct drm_crtc crtc; + struct drm_encoder encoder; + struct drm_connector connector; + + struct spi_device *spi; + struct gpio_desc *reset; + struct gpio_desc *busy; + struct gpio_desc *dc; +}; + +static inline struct pixpaper_panel *to_pixpaper_panel(struct drm_device *drm) +{ + return container_of(drm, struct pixpaper_panel, drm); +} + +static void pixpaper_wait_for_panel(struct pixpaper_panel *panel) +{ + unsigned int timeout_ms = 10000; + unsigned long timeout_jiffies = jiffies + msecs_to_jiffies(timeout_ms); + + usleep_range(1000, 1500); + while (gpiod_get_value_cansleep(panel->busy) != 1) { + if (time_after(jiffies, timeout_jiffies)) { + drm_warn(&panel->drm, "Busy wait timed out\n"); + return; + } + usleep_range(100, 200); + } +} + +static void pixpaper_spi_sync(struct spi_device *spi, struct spi_message *msg, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + int ret = spi_sync(spi, msg); + + if (ret < 0) + err->errno_code = ret; +} + +static void pixpaper_send_cmd(struct pixpaper_panel *panel, u8 cmd, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + struct spi_transfer xfer = { + .tx_buf = &cmd, + .len = 1, + }; + struct spi_message msg; + + spi_message_init(&msg); + spi_message_add_tail(&xfer, &msg); + + gpiod_set_value_cansleep(panel->dc, 0); + usleep_range(1, 5); + pixpaper_spi_sync(panel->spi, &msg, err); +} + +static void pixpaper_send_data(struct pixpaper_panel *panel, u8 data, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + struct spi_transfer xfer = { + .tx_buf = &data, + .len = 1, + }; + struct spi_message msg; + + spi_message_init(&msg); + spi_message_add_tail(&xfer, &msg); + + gpiod_set_value_cansleep(panel->dc, 1); + usleep_range(1, 5); + pixpaper_spi_sync(panel->spi, &msg, err); +} + +static int pixpaper_panel_hw_init(struct pixpaper_panel *panel) +{ + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + gpiod_set_value_cansleep(panel->reset, 0); + msleep(50); + gpiod_set_value_cansleep(panel->reset, 1); + msleep(50); + + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_4D, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_4D_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_PANEL_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_PSR_CONFIG, &err); + pixpaper_send_data(panel, PIXPAPER_PSR_CONFIG2, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSP_8_2V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSPL_15V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSN_4V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSP_8_2V, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_OFF_SEQUENCE, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG3, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_BOOSTER_SOFT_START, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG3, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG4, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG5, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG6, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG7, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_PLL_CONTROL, &err); + pixpaper_send_data(panel, PIXPAPER_PLL_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_TEMP_SENSOR_CALIB, &err); + pixpaper_send_data(panel, PIXPAPER_TSE_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_VCOM_INTERVAL, &err); + pixpaper_send_data(panel, PIXPAPER_CDI_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_60, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_60_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_60_CONFIG2, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_RESOLUTION_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_HRES_H, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_HRES_L, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_VRES_H, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_VRES_L, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_GATE_SOURCE_START, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_S_START, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_RESERVED, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_G_START_H, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_G_START_L, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E7, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E7_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_SAVING, &err); + pixpaper_send_data(panel, PIXPAPER_PWS_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E0, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E0_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_B4, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_B4_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_B5, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_B5_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E9, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E9_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + return 0; + +init_fail: + drm_err(&panel->drm, "Hardware initialization failed (err=%d)\n", + err.errno_code); + return err.errno_code; +} + +/* + * Convert framebuffer pixels to 2-bit e-paper format: + * 00 - White + * 01 - Black + * 10 - Yellow + * 11 - Red + */ +static u8 pack_pixels_to_byte(__le32 *src_pixels, int i, int j, + struct drm_framebuffer *fb) +{ + u8 packed_byte = 0; + int k; + + for (k = 0; k < 4; k++) { + int current_pixel_x = j * 4 + k; + u8 two_bit_val; + + if (current_pixel_x < PIXPAPER_WIDTH) { + u32 pixel_offset = + (i * (fb->pitches[0] / 4)) + current_pixel_x; + u32 pixel = le32_to_cpu(src_pixels[pixel_offset]); + u32 r = (pixel >> 16) & 0xFF; + u32 g = (pixel >> 8) & 0xFF; + u32 b = pixel & 0xFF; + + if (r < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + g < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b00; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + b > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL) { + two_bit_val = 0b01; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b11; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g > PIXPAPER_COLOR_THRESHOLD_YELLOW_MIN_GREEN && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b10; + } else { + two_bit_val = 0b01; + } + } else { + two_bit_val = 0b01; + } + + packed_byte |= two_bit_val << ((3 - k) * 2); + } + + return packed_byte; +} + +static int pixpaper_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = + drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *new_crtc = new_plane_state->crtc; + struct drm_crtc_state *new_crtc_state = NULL; + int ret; + + if (new_crtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + + ret = drm_atomic_helper_check_plane_state(new_plane_state, + new_crtc_state, DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, false, false); + if (ret) + return ret; + else if (!new_plane_state->visible) + return 0; + + return 0; +} + +static int pixpaper_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, crtc); + + if (!crtc_state->enable) + return 0; + + return drm_atomic_helper_check_crtc_primary_plane(crtc_state); +} + +static void pixpaper_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + struct drm_device *drm = &panel->drm; + int idx; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + if (!drm_dev_enter(drm, &idx)) + return; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_ON, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send PON command: %d\n", err.errno_code); + goto exit_drm_dev; + } + + pixpaper_wait_for_panel(panel); + + drm_dbg(drm, "Panel enabled and powered on\n"); + +exit_drm_dev: + drm_dev_exit(idx); +} + +static void pixpaper_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + struct drm_device *drm = &panel->drm; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_OFF, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send POF command: %d\n", err.errno_code); + goto exit_drm_dev; + } + pixpaper_wait_for_panel(panel); + + drm_dbg(drm, "Panel disabled\n"); + +exit_drm_dev: + drm_dev_exit(idx); +} + +static void pixpaper_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_new_plane_state(state, plane); + struct drm_shadow_plane_state *shadow_plane_state = + to_drm_shadow_plane_state(plane_state); + struct drm_crtc *crtc = plane_state->crtc; + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + + struct drm_device *drm = &panel->drm; + struct drm_framebuffer *fb = plane_state->fb; + struct iosys_map map = shadow_plane_state->data[0]; + void *vaddr = map.vaddr; + int i, j, idx; + __le32 *src_pixels = NULL; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + if (!drm_dev_enter(drm, &idx)) + return; + + drm_dbg(drm, "Starting frame update (phys=%dx%d, buf_w=%d)\n", + PIXPAPER_WIDTH, PIXPAPER_HEIGHT, PIXPAPER_PANEL_BUFFER_WIDTH); + + if (!fb || !plane_state->visible) { + drm_err_once(drm, "No framebuffer or plane not visible, skipping update\n"); + goto update_cleanup; + } + + src_pixels = (__le32 *)vaddr; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_DATA_START_TRANSMISSION, &err); + if (err.errno_code) + goto update_cleanup; + + pixpaper_wait_for_panel(panel); + + for (i = 0; i < PIXPAPER_HEIGHT; i++) { + for (j = 0; j < PIXPAPER_PANEL_BUFFER_TWO_BYTES_PER_ROW; j++) { + u8 packed_byte = + pack_pixels_to_byte(src_pixels, i, j, fb); + + pixpaper_wait_for_panel(panel); + pixpaper_send_data(panel, packed_byte, &err); + } + } + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_ON, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send PON command: %d\n", err.errno_code); + goto update_cleanup; + } + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_DISPLAY_REFRESH, &err); + pixpaper_send_data(panel, PIXPAPER_DRF_VCOM_AC, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed sending data after DRF: %d\n", err.errno_code); + goto update_cleanup; + } + pixpaper_wait_for_panel(panel); + +update_cleanup: + if (err.errno_code && err.errno_code != -ETIMEDOUT) + drm_err_once(drm, "Frame update function failed with error %d\n", err.errno_code); + + drm_dev_exit(idx); +} + +static const struct drm_display_mode pixpaper_mode = { + .clock = PIXPAPER_PIXEL_CLOCK, + .hdisplay = PIXPAPER_WIDTH, + .hsync_start = PIXPAPER_WIDTH + PIXPAPER_HFP, + .hsync_end = PIXPAPER_WIDTH + PIXPAPER_HFP + PIXPAPER_HSYNC, + .htotal = PIXPAPER_HTOTAL, + .vdisplay = PIXPAPER_HEIGHT, + .vsync_start = PIXPAPER_HEIGHT + PIXPAPER_VFP, + .vsync_end = PIXPAPER_HEIGHT + PIXPAPER_VFP + PIXPAPER_VSYNC, + .vtotal = PIXPAPER_VTOTAL, + .width_mm = PIXPAPER_WIDTH_MM, + .height_mm = PIXPAPER_HEIGHT_MM, + .type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +}; + +static int pixpaper_connector_get_modes(struct drm_connector *connector) +{ + return drm_connector_helper_get_modes_fixed(connector, &pixpaper_mode); +} + +static const struct drm_plane_funcs pixpaper_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + DRM_GEM_SHADOW_PLANE_FUNCS, +}; + +static const struct drm_plane_helper_funcs pixpaper_plane_helper_funcs = { + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = pixpaper_plane_helper_atomic_check, + .atomic_update = pixpaper_plane_atomic_update, +}; + +static const struct drm_crtc_funcs pixpaper_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .destroy = drm_crtc_cleanup, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, +}; + +static int pixpaper_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + if (mode->hdisplay == PIXPAPER_WIDTH && + mode->vdisplay == PIXPAPER_HEIGHT) { + return MODE_OK; + } + return MODE_BAD; +} + +static const struct drm_crtc_helper_funcs pixpaper_crtc_helper_funcs = { + .mode_valid = pixpaper_mode_valid, + .atomic_check = pixpaper_crtc_helper_atomic_check, + .atomic_enable = pixpaper_crtc_atomic_enable, + .atomic_disable = pixpaper_crtc_atomic_disable, +}; + +static const struct drm_encoder_funcs pixpaper_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static const struct drm_connector_funcs pixpaper_connector_funcs = { + .reset = drm_atomic_helper_connector_reset, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_connector_helper_funcs pixpaper_connector_helper_funcs = { + .get_modes = pixpaper_connector_get_modes, +}; + +DEFINE_DRM_GEM_FOPS(pixpaper_fops); + +static struct drm_driver pixpaper_drm_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .fops = &pixpaper_fops, + .name = "pixpaper", + .desc = "DRM driver for PIXPAPER e-ink", + .major = 1, + .minor = 0, + DRM_GEM_SHMEM_DRIVER_OPS, + DRM_FBDEV_SHMEM_DRIVER_OPS, +}; + +static const struct drm_mode_config_funcs pixpaper_mode_config_funcs = { + .fb_create = drm_gem_fb_create_with_dirty, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static int pixpaper_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct pixpaper_panel *panel; + struct drm_device *drm; + int ret; + + panel = devm_drm_dev_alloc(dev, &pixpaper_drm_driver, + struct pixpaper_panel, drm); + if (IS_ERR(panel)) + return PTR_ERR(panel); + + drm = &panel->drm; + panel->spi = spi; + spi_set_drvdata(spi, panel); + + spi->mode = SPI_MODE_0; + spi->bits_per_word = PIXPAPER_SPI_BITS_PER_WORD; + + if (!spi->max_speed_hz) { + drm_warn(drm, + "spi-max-frequency not specified in DT, using default %u Hz\n", + PIXPAPER_SPI_SPEED_DEFAULT); + spi->max_speed_hz = PIXPAPER_SPI_SPEED_DEFAULT; + } + + ret = spi_setup(spi); + if (ret < 0) { + drm_err(drm, "SPI setup failed: %d\n", ret); + return ret; + } + + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + drm_err(drm, "Failed to set DMA mask: %d\n", ret); + return ret; + } + + panel->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(panel->reset)) + return PTR_ERR(panel->reset); + + panel->busy = devm_gpiod_get(dev, "busy", GPIOD_IN); + if (IS_ERR(panel->busy)) + return PTR_ERR(panel->busy); + + panel->dc = devm_gpiod_get(dev, "dc", GPIOD_OUT_HIGH); + if (IS_ERR(panel->dc)) + return PTR_ERR(panel->dc); + + ret = pixpaper_panel_hw_init(panel); + if (ret) { + drm_err(drm, "Panel hardware initialization failed: %d\n", ret); + return ret; + } + + ret = drmm_mode_config_init(drm); + if (ret) + return ret; + drm->mode_config.funcs = &pixpaper_mode_config_funcs; + drm->mode_config.min_width = PIXPAPER_WIDTH; + drm->mode_config.max_width = PIXPAPER_WIDTH; + drm->mode_config.min_height = PIXPAPER_HEIGHT; + drm->mode_config.max_height = PIXPAPER_HEIGHT; + + ret = drm_universal_plane_init(drm, &panel->plane, 1, + &pixpaper_plane_funcs, + (const uint32_t[]){ DRM_FORMAT_XRGB8888 }, + 1, NULL, DRM_PLANE_TYPE_PRIMARY, NULL); + if (ret) + return ret; + drm_plane_helper_add(&panel->plane, &pixpaper_plane_helper_funcs); + + ret = drm_crtc_init_with_planes(drm, &panel->crtc, &panel->plane, NULL, + &pixpaper_crtc_funcs, NULL); + if (ret) + return ret; + drm_crtc_helper_add(&panel->crtc, &pixpaper_crtc_helper_funcs); + + ret = drm_encoder_init(drm, &panel->encoder, &pixpaper_encoder_funcs, + DRM_MODE_ENCODER_NONE, NULL); + if (ret) + return ret; + panel->encoder.possible_crtcs = drm_crtc_mask(&panel->crtc); + + ret = drm_connector_init(drm, &panel->connector, + &pixpaper_connector_funcs, + DRM_MODE_CONNECTOR_SPI); + if (ret) + return ret; + + drm_connector_helper_add(&panel->connector, + &pixpaper_connector_helper_funcs); + drm_connector_attach_encoder(&panel->connector, &panel->encoder); + + drm_mode_config_reset(drm); + + ret = drm_dev_register(drm, 0); + if (ret) + return ret; + + drm_client_setup(drm, NULL); + + return 0; +} + +static void pixpaper_remove(struct spi_device *spi) +{ + struct pixpaper_panel *panel = spi_get_drvdata(spi); + + if (!panel) + return; + + drm_dev_unplug(&panel->drm); + drm_atomic_helper_shutdown(&panel->drm); +} + +static const struct spi_device_id pixpaper_ids[] = { { "pixpaper", 0 }, {} }; +MODULE_DEVICE_TABLE(spi, pixpaper_ids); + +static const struct of_device_id pixpaper_dt_ids[] = { + { .compatible = "mayqueen,pixpaper" }, + {} +}; +MODULE_DEVICE_TABLE(of, pixpaper_dt_ids); + +static struct spi_driver pixpaper_spi_driver = { + .driver = { + .name = "pixpaper", + .of_match_table = pixpaper_dt_ids, + }, + .id_table = pixpaper_ids, + .probe = pixpaper_probe, + .remove = pixpaper_remove, +}; + +module_spi_driver(pixpaper_spi_driver); + +MODULE_AUTHOR("LiangCheng Wang"); +MODULE_DESCRIPTION("DRM SPI driver for PIXPAPER e-ink panel"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f4d9e68b21e7..29423ceeec5c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1283,3 +1283,18 @@ int ttm_bo_populate(struct ttm_buffer_object *bo, return 0; } EXPORT_SYMBOL(ttm_bo_populate); + +int ttm_bo_setup_export(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx) +{ + int ret; + + ret = ttm_bo_reserve(bo, false, false, NULL); + if (ret != 0) + return ret; + + ret = ttm_bo_populate(bo, ctx); + ttm_bo_unreserve(bo); + return ret; +} +EXPORT_SYMBOL(ttm_bo_setup_export); diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 2def155ce496..c5a3bbbc74c5 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -157,12 +157,24 @@ v3d_open(struct drm_device *dev, struct drm_file *file) static void v3d_postclose(struct drm_device *dev, struct drm_file *file) { + struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv = file->driver_priv; + unsigned long irqflags; enum v3d_queue q; - for (q = 0; q < V3D_MAX_QUEUES; q++) + for (q = 0; q < V3D_MAX_QUEUES; q++) { + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_job *job = queue->active_job; + drm_sched_entity_destroy(&v3d_priv->sched_entity[q]); + if (job && job->base.entity == &v3d_priv->sched_entity[q]) { + spin_lock_irqsave(&queue->queue_lock, irqflags); + job->file_priv = NULL; + spin_unlock_irqrestore(&queue->queue_lock, irqflags); + } + } + v3d_perfmon_close_file(v3d_priv); kfree(v3d_priv); } diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 82d84a96235f..0317f3d7452a 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -58,6 +58,10 @@ struct v3d_queue_state { /* Stores the GPU stats for this queue in the global context. */ struct v3d_stats stats; + + /* Currently active job for this queue */ + struct v3d_job *active_job; + spinlock_t queue_lock; }; /* Performance monitor object. The perform lifetime is controlled by userspace @@ -159,18 +163,8 @@ struct v3d_dev { struct work_struct overflow_mem_work; - struct v3d_bin_job *bin_job; - struct v3d_render_job *render_job; - struct v3d_tfu_job *tfu_job; - struct v3d_csd_job *csd_job; - struct v3d_queue_state queue[V3D_MAX_QUEUES]; - /* Spinlock used to synchronize the overflow memory - * management against bin job submission. - */ - spinlock_t job_lock; - /* Used to track the active perfmon if any. */ struct v3d_perfmon *active_perfmon; @@ -327,9 +321,9 @@ struct v3d_job { struct v3d_perfmon *perfmon; /* File descriptor of the process that submitted the job that could be used - * for collecting stats by process of GPU usage. + * to collect per-process information about the GPU. */ - struct drm_file *file; + struct v3d_file_priv *file_priv; /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); @@ -570,7 +564,7 @@ void v3d_get_stats(const struct v3d_stats *stats, u64 timestamp, /* v3d_fence.c */ extern const struct dma_fence_ops v3d_fence_ops; -struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q); /* v3d_gem.c */ int v3d_gem_init(struct drm_device *dev); @@ -614,7 +608,7 @@ void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, unsigned int count); void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, unsigned int count); -void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); +void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c index 89840ed212c0..8f8471adae34 100644 --- a/drivers/gpu/drm/v3d/v3d_fence.c +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -3,8 +3,9 @@ #include "v3d_drv.h" -struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q) { + struct v3d_queue_state *queue = &v3d->queue[q]; struct v3d_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); @@ -12,10 +13,10 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) return ERR_PTR(-ENOMEM); fence->dev = &v3d->drm; - fence->queue = queue; - fence->seqno = ++v3d->queue[queue].emit_seqno; - dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, - v3d->queue[queue].fence_context, fence->seqno); + fence->queue = q; + fence->seqno = ++queue->emit_seqno; + dma_fence_init(&fence->base, &v3d_fence_ops, &queue->queue_lock, + queue->fence_context, fence->seqno); return &fence->base; } diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 37bf5eecdd2c..c77d90aa9b82 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -271,10 +271,11 @@ v3d_gem_init(struct drm_device *dev) queue->fence_context = dma_fence_context_alloc(1); memset(&queue->stats, 0, sizeof(queue->stats)); seqcount_init(&queue->stats.lock); + + spin_lock_init(&queue->queue_lock); } spin_lock_init(&v3d->mm_lock); - spin_lock_init(&v3d->job_lock); ret = drmm_mutex_init(dev, &v3d->bo_lock); if (ret) return ret; @@ -324,6 +325,7 @@ void v3d_gem_destroy(struct drm_device *dev) { struct v3d_dev *v3d = to_v3d_dev(dev); + enum v3d_queue q; v3d_sched_fini(v3d); v3d_gemfs_fini(v3d); @@ -331,10 +333,8 @@ v3d_gem_destroy(struct drm_device *dev) /* Waiting for jobs to finish would need to be done before * unregistering V3D. */ - WARN_ON(v3d->bin_job); - WARN_ON(v3d->render_job); - WARN_ON(v3d->tfu_job); - WARN_ON(v3d->csd_job); + for (q = 0; q < V3D_MAX_QUEUES; q++) + WARN_ON(v3d->queue[q].active_job); drm_mm_takedown(&v3d->mm); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index a515a301e480..31ecc5b4ba5a 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -42,6 +42,8 @@ v3d_overflow_mem_work(struct work_struct *work) container_of(work, struct v3d_dev, overflow_mem_work); struct drm_device *dev = &v3d->drm; struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); + struct v3d_queue_state *queue = &v3d->queue[V3D_BIN]; + struct v3d_bin_job *bin_job; struct drm_gem_object *obj; unsigned long irqflags; @@ -60,15 +62,17 @@ v3d_overflow_mem_work(struct work_struct *work) * bin job got scheduled, that's fine. We'll just give them * some binner pool anyway. */ - spin_lock_irqsave(&v3d->job_lock, irqflags); - if (!v3d->bin_job) { - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_lock_irqsave(&queue->queue_lock, irqflags); + bin_job = (struct v3d_bin_job *)queue->active_job; + + if (!bin_job) { + spin_unlock_irqrestore(&queue->queue_lock, irqflags); goto out; } drm_gem_object_get(obj); - list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + list_add_tail(&bo->unref_head, &bin_job->render->unref_list); + spin_unlock_irqrestore(&queue->queue_lock, irqflags); v3d_mmu_flush_all(v3d); @@ -79,6 +83,20 @@ out: drm_gem_object_put(obj); } +static void +v3d_irq_signal_fence(struct v3d_dev *v3d, enum v3d_queue q, + void (*trace_irq)(struct drm_device *, uint64_t)) +{ + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_fence *fence = to_v3d_fence(queue->active_job->irq_fence); + + v3d_job_update_stats(queue->active_job, q); + trace_irq(&v3d->drm, fence->seqno); + + queue->active_job = NULL; + dma_fence_signal(&fence->base); +} + static irqreturn_t v3d_irq(int irq, void *arg) { @@ -102,41 +120,17 @@ v3d_irq(int irq, void *arg) } if (intsts & V3D_INT_FLDONE) { - struct v3d_fence *fence = - to_v3d_fence(v3d->bin_job->base.irq_fence); - - v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN); - trace_v3d_bcl_irq(&v3d->drm, fence->seqno); - - v3d->bin_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_BIN, trace_v3d_bcl_irq); status = IRQ_HANDLED; } if (intsts & V3D_INT_FRDONE) { - struct v3d_fence *fence = - to_v3d_fence(v3d->render_job->base.irq_fence); - - v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER); - trace_v3d_rcl_irq(&v3d->drm, fence->seqno); - - v3d->render_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_RENDER, trace_v3d_rcl_irq); status = IRQ_HANDLED; } if (intsts & V3D_INT_CSDDONE(v3d->ver)) { - struct v3d_fence *fence = - to_v3d_fence(v3d->csd_job->base.irq_fence); - - v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD); - trace_v3d_csd_irq(&v3d->drm, fence->seqno); - - v3d->csd_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_CSD, trace_v3d_csd_irq); status = IRQ_HANDLED; } @@ -168,15 +162,7 @@ v3d_hub_irq(int irq, void *arg) V3D_WRITE(V3D_HUB_INT_CLR, intsts); if (intsts & V3D_HUB_INT_TFUC) { - struct v3d_fence *fence = - to_v3d_fence(v3d->tfu_job->base.irq_fence); - - v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU); - trace_v3d_tfu_irq(&v3d->drm, fence->seqno); - - v3d->tfu_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_TFU, trace_v3d_tfu_irq); status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index f9d9a198d718..0ec06bfbbebb 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -139,7 +139,7 @@ static void v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) { struct v3d_dev *v3d = job->v3d; - struct v3d_file_priv *file = job->file->driver_priv; + struct v3d_file_priv *file = job->file_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); @@ -194,11 +194,11 @@ v3d_stats_update(struct v3d_stats *stats, u64 now) } void -v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) +v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q) { struct v3d_dev *v3d = job->v3d; - struct v3d_file_priv *file = job->file->driver_priv; - struct v3d_stats *global_stats = &v3d->queue[queue].stats; + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_stats *global_stats = &queue->stats; u64 now = local_clock(); unsigned long flags; @@ -209,10 +209,10 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) preempt_disable(); /* Don't update the local stats if the file context has already closed */ - if (file) - v3d_stats_update(&file->stats[queue], now); - else - drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n"); + spin_lock(&queue->queue_lock); + if (job->file_priv) + v3d_stats_update(&job->file_priv->stats[q], now); + spin_unlock(&queue->queue_lock); v3d_stats_update(global_stats, now); @@ -226,27 +226,28 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_queue_state *queue = &v3d->queue[V3D_BIN]; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; if (unlikely(job->base.base.s_fence->finished.error)) { - spin_lock_irqsave(&v3d->job_lock, irqflags); - v3d->bin_job = NULL; - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_lock_irqsave(&queue->queue_lock, irqflags); + queue->active_job = NULL; + spin_unlock_irqrestore(&queue->queue_lock, irqflags); return NULL; } /* Lock required around bin_job update vs * v3d_overflow_mem_work(). */ - spin_lock_irqsave(&v3d->job_lock, irqflags); - v3d->bin_job = job; + spin_lock_irqsave(&queue->queue_lock, irqflags); + queue->active_job = &job->base; /* Clear out the overflow allocation, so we don't * reuse the overflow attached to a previous job. */ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_unlock_irqrestore(&queue->queue_lock, irqflags); v3d_invalidate_caches(v3d); @@ -290,11 +291,11 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->render_job = NULL; + v3d->queue[V3D_RENDER].active_job = NULL; return NULL; } - v3d->render_job = job; + v3d->queue[V3D_RENDER].active_job = &job->base; /* Can we avoid this flush? We need to be careful of * scheduling, though -- imagine job0 rendering to texture and @@ -338,11 +339,11 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->tfu_job = NULL; + v3d->queue[V3D_TFU].active_job = NULL; return NULL; } - v3d->tfu_job = job; + v3d->queue[V3D_TFU].active_job = &job->base; fence = v3d_fence_create(v3d, V3D_TFU); if (IS_ERR(fence)) @@ -386,11 +387,11 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) int i, csd_cfg0_reg; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->csd_job = NULL; + v3d->queue[V3D_CSD].active_job = NULL; return NULL; } - v3d->csd_job = job; + v3d->queue[V3D_CSD].active_job = &job->base; v3d_invalidate_caches(v3d); @@ -574,7 +575,7 @@ static void v3d_reset_performance_queries(struct v3d_cpu_job *job) { struct v3d_performance_query_info *performance_query = &job->performance_query; - struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_file_priv *v3d_priv = job->base.file_priv; struct v3d_dev *v3d = job->base.v3d; struct v3d_perfmon *perfmon; @@ -604,7 +605,7 @@ v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, { struct v3d_performance_query_info *performance_query = &job->performance_query; - struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_file_priv *v3d_priv = job->base.file_priv; struct v3d_performance_query *perf_query = &performance_query->queries[query]; struct v3d_dev *v3d = job->base.v3d; @@ -700,6 +701,7 @@ v3d_cpu_job_run(struct drm_sched_job *sched_job) trace_v3d_cpu_job_end(&v3d->drm, job->job_type); v3d_job_update_stats(&job->base, V3D_CPU); + /* Synchronous operation, so no fence to wait on. */ return NULL; } @@ -715,21 +717,24 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job) v3d_job_update_stats(job, V3D_CACHE_CLEAN); + /* Synchronous operation, so no fence to wait on. */ return NULL; } static enum drm_gpu_sched_stat -v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) +v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job, + enum v3d_queue q) { struct v3d_job *job = to_v3d_job(sched_job); - struct v3d_file_priv *v3d_priv = job->file->driver_priv; - enum v3d_queue q; + struct v3d_file_priv *v3d_priv = job->file_priv; + unsigned long irqflags; + enum v3d_queue i; mutex_lock(&v3d->reset_lock); /* block scheduler */ - for (q = 0; q < V3D_MAX_QUEUES; q++) - drm_sched_stop(&v3d->queue[q].sched, sched_job); + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_stop(&v3d->queue[i].sched, sched_job); if (sched_job) drm_sched_increase_karma(sched_job); @@ -738,15 +743,17 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) v3d_reset(v3d); v3d->reset_counter++; - v3d_priv->reset_counter++; + spin_lock_irqsave(&v3d->queue[q].queue_lock, irqflags); + if (v3d_priv) + v3d_priv->reset_counter++; + spin_unlock_irqrestore(&v3d->queue[q].queue_lock, irqflags); - for (q = 0; q < V3D_MAX_QUEUES; q++) - drm_sched_resubmit_jobs(&v3d->queue[q].sched); + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_resubmit_jobs(&v3d->queue[i].sched); /* Unblock schedulers and restart their jobs. */ - for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_start(&v3d->queue[q].sched, 0); - } + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_start(&v3d->queue[i].sched, 0); mutex_unlock(&v3d->reset_lock); @@ -774,7 +781,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, return DRM_GPU_SCHED_STAT_NO_HANG; } - return v3d_gpu_reset_for_timeout(v3d, sched_job); + return v3d_gpu_reset_for_timeout(v3d, sched_job, q); } static enum drm_gpu_sched_stat @@ -796,11 +803,11 @@ v3d_render_job_timedout(struct drm_sched_job *sched_job) } static enum drm_gpu_sched_stat -v3d_generic_job_timedout(struct drm_sched_job *sched_job) +v3d_tfu_job_timedout(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); - return v3d_gpu_reset_for_timeout(job->v3d, sched_job); + return v3d_gpu_reset_for_timeout(job->v3d, sched_job, V3D_TFU); } static enum drm_gpu_sched_stat @@ -819,7 +826,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NO_HANG; } - return v3d_gpu_reset_for_timeout(v3d, sched_job); + return v3d_gpu_reset_for_timeout(v3d, sched_job, V3D_CSD); } static const struct drm_sched_backend_ops v3d_bin_sched_ops = { @@ -836,7 +843,7 @@ static const struct drm_sched_backend_ops v3d_render_sched_ops = { static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { .run_job = v3d_tfu_job_run, - .timedout_job = v3d_generic_job_timedout, + .timedout_job = v3d_tfu_job_timedout, .free_job = v3d_sched_job_free, }; @@ -848,13 +855,11 @@ static const struct drm_sched_backend_ops v3d_csd_sched_ops = { static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { .run_job = v3d_cache_clean_job_run, - .timedout_job = v3d_generic_job_timedout, .free_job = v3d_sched_job_free }; static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { .run_job = v3d_cpu_job_run, - .timedout_job = v3d_generic_job_timedout, .free_job = v3d_cpu_job_free }; diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 5171ffe9012d..f3652e90683c 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -166,7 +166,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, job->v3d = v3d; job->free = free; - job->file = file_priv; + job->file_priv = v3d_priv; ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 1, v3d_priv, file_priv->client_id); diff --git a/drivers/gpu/drm/vkms/tests/vkms_config_test.c b/drivers/gpu/drm/vkms/tests/vkms_config_test.c index ff4566cf9925..b0d78a81d2df 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_config_test.c +++ b/drivers/gpu/drm/vkms/tests/vkms_config_test.c @@ -200,6 +200,7 @@ static void vkms_config_test_get_planes(struct kunit *test) KUNIT_ASSERT_EQ(test, n_planes, 0); plane_cfg1 = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg1); vkms_config_for_each_plane(config, plane_cfg) { n_planes++; if (plane_cfg != plane_cfg1) @@ -209,6 +210,7 @@ static void vkms_config_test_get_planes(struct kunit *test) n_planes = 0; plane_cfg2 = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg2); vkms_config_for_each_plane(config, plane_cfg) { n_planes++; if (plane_cfg != plane_cfg1 && plane_cfg != plane_cfg2) @@ -242,6 +244,7 @@ static void vkms_config_test_get_crtcs(struct kunit *test) KUNIT_FAIL(test, "Unexpected CRTC"); crtc_cfg1 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); KUNIT_ASSERT_EQ(test, vkms_config_get_num_crtcs(config), 1); vkms_config_for_each_crtc(config, crtc_cfg) { if (crtc_cfg != crtc_cfg1) @@ -249,6 +252,7 @@ static void vkms_config_test_get_crtcs(struct kunit *test) } crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); KUNIT_ASSERT_EQ(test, vkms_config_get_num_crtcs(config), 2); vkms_config_for_each_crtc(config, crtc_cfg) { if (crtc_cfg != crtc_cfg1 && crtc_cfg != crtc_cfg2) @@ -280,6 +284,7 @@ static void vkms_config_test_get_encoders(struct kunit *test) KUNIT_ASSERT_EQ(test, n_encoders, 0); encoder_cfg1 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); vkms_config_for_each_encoder(config, encoder_cfg) { n_encoders++; if (encoder_cfg != encoder_cfg1) @@ -289,6 +294,7 @@ static void vkms_config_test_get_encoders(struct kunit *test) n_encoders = 0; encoder_cfg2 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); vkms_config_for_each_encoder(config, encoder_cfg) { n_encoders++; if (encoder_cfg != encoder_cfg1 && encoder_cfg != encoder_cfg2) @@ -324,6 +330,7 @@ static void vkms_config_test_get_connectors(struct kunit *test) KUNIT_ASSERT_EQ(test, n_connectors, 0); connector_cfg1 = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg1); vkms_config_for_each_connector(config, connector_cfg) { n_connectors++; if (connector_cfg != connector_cfg1) @@ -333,6 +340,7 @@ static void vkms_config_test_get_connectors(struct kunit *test) n_connectors = 0; connector_cfg2 = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg2); vkms_config_for_each_connector(config, connector_cfg) { n_connectors++; if (connector_cfg != connector_cfg1 && @@ -370,7 +378,7 @@ static void vkms_config_test_invalid_plane_number(struct kunit *test) /* Invalid: Too many planes */ for (n = 0; n <= 32; n++) - vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_plane(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -395,6 +403,7 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: No primary plane */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_OVERLAY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -402,11 +411,13 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Multiple primary planes */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -419,11 +430,13 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Multiple cursor planes */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_CURSOR); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_CURSOR); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -437,12 +450,16 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Second CRTC without primary plane */ crtc_cfg = vkms_config_create_crtc(config); encoder_cfg = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg); + err = vkms_config_encoder_attach_crtc(encoder_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); /* Valid: Second CRTC with a primary plane */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -486,7 +503,7 @@ static void vkms_config_test_invalid_crtc_number(struct kunit *test) /* Invalid: Too many CRTCs */ for (n = 0; n <= 32; n++) - vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_crtc(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -509,7 +526,7 @@ static void vkms_config_test_invalid_encoder_number(struct kunit *test) /* Invalid: Too many encoders */ for (n = 0; n <= 32; n++) - vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_encoder(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -531,12 +548,15 @@ static void vkms_config_test_valid_encoder_possible_crtcs(struct kunit *test) /* Invalid: Encoder without a possible CRTC */ encoder_cfg = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); /* Valid: Second CRTC with shared encoder */ crtc_cfg2 = vkms_config_create_crtc(config); - plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); + vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg2); KUNIT_EXPECT_EQ(test, err, 0); @@ -577,7 +597,7 @@ static void vkms_config_test_invalid_connector_number(struct kunit *test) /* Invalid: Too many connectors */ for (n = 0; n <= 32; n++) - connector_cfg = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_connector(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -669,13 +689,19 @@ static void vkms_config_test_plane_attach_crtc(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, config); overlay_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, overlay_cfg); vkms_config_plane_set_type(overlay_cfg, DRM_PLANE_TYPE_OVERLAY); + primary_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, primary_cfg); vkms_config_plane_set_type(primary_cfg, DRM_PLANE_TYPE_PRIMARY); + cursor_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cursor_cfg); vkms_config_plane_set_type(cursor_cfg, DRM_PLANE_TYPE_CURSOR); crtc_cfg = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg); /* No primary or cursor planes */ KUNIT_EXPECT_NULL(test, vkms_config_crtc_primary_plane(config, crtc_cfg)); @@ -735,6 +761,11 @@ static void vkms_config_test_plane_get_possible_crtcs(struct kunit *test) crtc_cfg1 = vkms_config_create_crtc(config); crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + /* No possible CRTCs */ vkms_config_plane_for_each_possible_crtc(plane_cfg1, idx, possible_crtc) KUNIT_FAIL(test, "Unexpected possible CRTC"); @@ -799,6 +830,11 @@ static void vkms_config_test_encoder_get_possible_crtcs(struct kunit *test) crtc_cfg1 = vkms_config_create_crtc(config); crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + /* No possible CRTCs */ vkms_config_encoder_for_each_possible_crtc(encoder_cfg1, idx, possible_crtc) KUNIT_FAIL(test, "Unexpected possible CRTC"); @@ -863,6 +899,11 @@ static void vkms_config_test_connector_get_possible_encoders(struct kunit *test) encoder_cfg1 = vkms_config_create_encoder(config); encoder_cfg2 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); + /* No possible encoders */ vkms_config_connector_for_each_possible_encoder(connector_cfg1, idx, possible_encoder) diff --git a/drivers/gpu/drm/vkms/tests/vkms_format_test.c b/drivers/gpu/drm/vkms/tests/vkms_format_test.c index 2e1daef94831..a7788fbc45dc 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_format_test.c +++ b/drivers/gpu/drm/vkms/tests/vkms_format_test.c @@ -14,20 +14,20 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); /** - * struct pixel_yuv_u8 - Internal representation of a pixel color. - * @y: Luma value, stored in 8 bits, without padding, using + * struct pixel_yuv_u16 - Internal representation of a pixel color. + * @y: Luma value, stored in 16 bits, without padding, using * machine endianness - * @u: Blue difference chroma value, stored in 8 bits, without padding, using + * @u: Blue difference chroma value, stored in 16 bits, without padding, using * machine endianness - * @v: Red difference chroma value, stored in 8 bits, without padding, using + * @v: Red difference chroma value, stored in 16 bits, without padding, using * machine endianness */ -struct pixel_yuv_u8 { - u8 y, u, v; +struct pixel_yuv_u16 { + u16 y, u, v; }; /* - * struct yuv_u8_to_argb_u16_case - Reference values to test the color + * struct yuv_u16_to_argb_u16_case - Reference values to test the color * conversions in VKMS between YUV to ARGB * * @encoding: Encoding used to convert RGB to YUV @@ -39,13 +39,13 @@ struct pixel_yuv_u8 { * @format_pair.yuv: Same color as @format_pair.rgb, but converted to * YUV using @encoding and @range. */ -struct yuv_u8_to_argb_u16_case { +struct yuv_u16_to_argb_u16_case { enum drm_color_encoding encoding; enum drm_color_range range; size_t n_colors; struct format_pair { char *name; - struct pixel_yuv_u8 yuv; + struct pixel_yuv_u16 yuv; struct pixel_argb_u16 argb; } colors[TEST_BUFF_SIZE]; }; @@ -57,14 +57,14 @@ struct yuv_u8_to_argb_u16_case { * For more information got to the docs: * https://colour.readthedocs.io/en/master/generated/colour.RGB_to_YCbCr.html */ -static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { +static struct yuv_u16_to_argb_u16_case yuv_u16_to_argb_u16_cases[] = { /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, * K=colour.WEIGHTS_YCBCR["ITU-R BT.601"], * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * @@ -76,13 +76,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x4c, 0x55, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0x96, 0x2c, 0x15 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x1d, 0xff, 0x6b }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4c8b, 0x54ce, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0x9645, 0x2b33, 0x14d1 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1d2f, 0xffff, 0x6b2f }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -90,7 +90,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -101,13 +101,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x51, 0x5a, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0x91, 0x36, 0x22 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x29, 0xf0, 0x6e }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x517b, 0x5a34, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0x908e, 0x35cc, 0x2237 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x28f7, 0xf000, 0x6dc9 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -115,7 +115,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -126,21 +126,21 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x36, 0x63, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xb6, 0x1e, 0x0c }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x12, 0xff, 0x74 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x366d, 0x62ac, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xb717, 0x1d55, 0x0bbd }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x127c, 0xffff, 0x7443 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, * K=colour.WEIGHTS_YCBCR["ITU-R BT.709"], * in_bits = 16, - * int_legal = False, + * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -151,13 +151,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x3f, 0x66, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xad, 0x2a, 0x1a }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x20, 0xf0, 0x76 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x3e8f, 0x6656, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xaca1, 0x29aa, 0x1a45 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1fd0, 0xf000, 0x75bb }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -165,7 +165,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -176,13 +176,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x43, 0x5c, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xad, 0x24, 0x0b }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x0f, 0xff, 0x76 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4340, 0x5c41, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xad91, 0x23bf, 0x0a4c }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x0f2e, 0xffff, 0x75b5 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -190,7 +190,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -201,32 +201,30 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x4a, 0x61, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xa4, 0x2f, 0x19 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x1d, 0xf0, 0x77 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4988, 0x60b9, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xa47b, 0x2f47, 0x1902 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1cfd, 0xf000, 0x76fe }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, }; /* - * vkms_format_test_yuv_u8_to_argb_u16 - Testing the conversion between YUV + * vkms_format_test_yuv_u16_to_argb_u16 - Testing the conversion between YUV * colors to ARGB colors in VKMS * * This test will use the functions get_conversion_matrix_to_argb_u16 and - * argb_u16_from_yuv888 to convert YUV colors (stored in - * yuv_u8_to_argb_u16_cases) into ARGB colors. + * argb_u16_from_yuv161616 to convert YUV colors (stored in + * yuv_u16_to_argb_u16_cases) into ARGB colors. * * The conversion between YUV and RGB is not totally reversible, so there may be * some difference between the expected value and the result. - * In addition, there may be some rounding error as the input color is 8 bits - * and output color is 16 bits. */ -static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) +static void vkms_format_test_yuv_u16_to_argb_u16(struct kunit *test) { - const struct yuv_u8_to_argb_u16_case *param = test->param_value; + const struct yuv_u16_to_argb_u16_case *param = test->param_value; struct pixel_argb_u16 argb; for (size_t i = 0; i < param->n_colors; i++) { @@ -236,7 +234,8 @@ static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) get_conversion_matrix_to_argb_u16 (DRM_FORMAT_NV12, param->encoding, param->range, &matrix); - argb = argb_u16_from_yuv888(color->yuv.y, color->yuv.u, color->yuv.v, &matrix); + argb = argb_u16_from_yuv161616(&matrix, color->yuv.y, color->yuv.u, + color->yuv.v); KUNIT_EXPECT_LE_MSG(test, abs_diff(argb.a, color->argb.a), 0x1ff, "On the A channel of the color %s expected 0x%04x, got 0x%04x", @@ -253,19 +252,19 @@ static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) } } -static void vkms_format_test_yuv_u8_to_argb_u16_case_desc(struct yuv_u8_to_argb_u16_case *t, - char *desc) +static void vkms_format_test_yuv_u16_to_argb_u16_case_desc(struct yuv_u16_to_argb_u16_case *t, + char *desc) { snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s - %s", drm_get_color_encoding_name(t->encoding), drm_get_color_range_name(t->range)); } -KUNIT_ARRAY_PARAM(yuv_u8_to_argb_u16, yuv_u8_to_argb_u16_cases, - vkms_format_test_yuv_u8_to_argb_u16_case_desc +KUNIT_ARRAY_PARAM(yuv_u16_to_argb_u16, yuv_u16_to_argb_u16_cases, + vkms_format_test_yuv_u16_to_argb_u16_case_desc ); static struct kunit_case vkms_format_test_cases[] = { - KUNIT_CASE_PARAM(vkms_format_test_yuv_u8_to_argb_u16, yuv_u8_to_argb_u16_gen_params), + KUNIT_CASE_PARAM(vkms_format_test_yuv_u16_to_argb_u16, yuv_u16_to_argb_u16_gen_params), {} }; diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 6d0227c6635a..dfb8e13cba87 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -259,16 +259,27 @@ static struct pixel_argb_u16 argb_u16_from_grayu16(u16 gray) return argb_u16_from_u16161616(0xFFFF, gray, gray, gray); } -VISIBLE_IF_KUNIT struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, u8 channel_2, - const struct conversion_matrix *matrix) +static struct pixel_argb_u16 argb_u16_from_BGR565(const __le16 *pixel) +{ + struct pixel_argb_u16 out_pixel; + + out_pixel = argb_u16_from_RGB565(pixel); + swap(out_pixel.r, out_pixel.b); + + return out_pixel; +} + +VISIBLE_IF_KUNIT +struct pixel_argb_u16 argb_u16_from_yuv161616(const struct conversion_matrix *matrix, + u16 y, u16 channel_1, u16 channel_2) { u16 r, g, b; s64 fp_y, fp_channel_1, fp_channel_2; s64 fp_r, fp_g, fp_b; - fp_y = drm_int2fixp(((int)y - matrix->y_offset) * 257); - fp_channel_1 = drm_int2fixp(((int)channel_1 - 128) * 257); - fp_channel_2 = drm_int2fixp(((int)channel_2 - 128) * 257); + fp_y = drm_int2fixp((int)y - matrix->y_offset * 257); + fp_channel_1 = drm_int2fixp((int)channel_1 - 128 * 257); + fp_channel_2 = drm_int2fixp((int)channel_2 - 128 * 257); fp_r = drm_fixp_mul(matrix->matrix[0][0], fp_y) + drm_fixp_mul(matrix->matrix[0][1], fp_channel_1) + @@ -290,7 +301,65 @@ VISIBLE_IF_KUNIT struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, return argb_u16_from_u16161616(0xffff, r, g, b); } -EXPORT_SYMBOL_IF_KUNIT(argb_u16_from_yuv888); +EXPORT_SYMBOL_IF_KUNIT(argb_u16_from_yuv161616); + +/** + * READ_LINE() - Generic generator for a read_line function which can be used for format with one + * plane and a block_h == block_w == 1. + * + * @function_name: Function name to generate + * @pixel_name: Temporary pixel name used in the @__VA_ARGS__ parameters + * @pixel_type: Used to specify the type you want to cast the pixel pointer + * @callback: Callback to call for each pixels. This fonction should take @__VA_ARGS__ as parameter + * and return a pixel_argb_u16 + * __VA_ARGS__: Argument to pass inside the callback. You can use @pixel_name to access current + * pixel. + */ +#define READ_LINE(function_name, pixel_name, pixel_type, callback, ...) \ +static void function_name(const struct vkms_plane_state *plane, int x_start, \ + int y_start, enum pixel_read_direction direction, int count, \ + struct pixel_argb_u16 out_pixel[]) \ +{ \ + struct pixel_argb_u16 *end = out_pixel + count; \ + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); \ + u8 *src_pixels; \ + \ + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); \ + \ + while (out_pixel < end) { \ + pixel_type *(pixel_name) = (pixel_type *)src_pixels; \ + *out_pixel = (callback)(__VA_ARGS__); \ + out_pixel += 1; \ + src_pixels += step; \ + } \ +} + +/** + * READ_LINE_ARGB8888() - Generic generator for ARGB8888 formats. + * The pixel type used is u8, so pixel_name[0]..pixel_name[n] are the n components of the pixel. + * + * @function_name: Function name to generate + * @pixel_name: temporary pixel to use in @a, @r, @g and @b parameters + * @a: alpha value + * @r: red value + * @g: green value + * @b: blue value + */ +#define READ_LINE_ARGB8888(function_name, pixel_name, a, r, g, b) \ + READ_LINE(function_name, pixel_name, u8, argb_u16_from_u8888, a, r, g, b) +/** + * READ_LINE_le16161616() - Generic generator for ARGB16161616 formats. + * The pixel type used is u16, so pixel_name[0]..pixel_name[n] are the n components of the pixel. + * + * @function_name: Function name to generate + * @pixel_name: temporary pixel to use in @a, @r, @g and @b parameters + * @a: alpha value + * @r: red value + * @g: green value + * @b: blue value + */ +#define READ_LINE_le16161616(function_name, pixel_name, a, r, g, b) \ + READ_LINE(function_name, pixel_name, __le16, argb_u16_from_le16161616, a, r, g, b) /* * The following functions are read_line function for each pixel format supported by VKMS. @@ -378,138 +447,27 @@ static void R4_read_line(const struct vkms_plane_state *plane, int x_start, Rx_read_line(plane, x_start, y_start, direction, count, out_pixel); } -static void R8_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - while (out_pixel < end) { - *out_pixel = argb_u16_from_gray8(*src_pixels); - src_pixels += step; - out_pixel += 1; - } -} - -static void ARGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - *out_pixel = argb_u16_from_u8888(px[3], px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void XRGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - *out_pixel = argb_u16_from_u8888(255, px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void ABGR8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - /* Switch blue and red pixels. */ - *out_pixel = argb_u16_from_u8888(px[3], px[0], px[1], px[2]); - out_pixel += 1; - src_pixels += step; - } -} - -static void ARGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - while (out_pixel < end) { - u16 *px = (u16 *)src_pixels; - *out_pixel = argb_u16_from_u16161616(px[3], px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} +READ_LINE_ARGB8888(XRGB8888_read_line, px, 0xFF, px[2], px[1], px[0]) +READ_LINE_ARGB8888(XBGR8888_read_line, px, 0xFF, px[0], px[1], px[2]) -static void XRGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - __le16 *px = (__le16 *)src_pixels; - *out_pixel = argb_u16_from_le16161616(cpu_to_le16(0xFFFF), px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; +READ_LINE_ARGB8888(ARGB8888_read_line, px, px[3], px[2], px[1], px[0]) +READ_LINE_ARGB8888(ABGR8888_read_line, px, px[3], px[0], px[1], px[2]) +READ_LINE_ARGB8888(RGBA8888_read_line, px, px[0], px[3], px[2], px[1]) +READ_LINE_ARGB8888(BGRA8888_read_line, px, px[0], px[1], px[2], px[3]) - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); +READ_LINE_ARGB8888(RGB888_read_line, px, 0xFF, px[2], px[1], px[0]) +READ_LINE_ARGB8888(BGR888_read_line, px, 0xFF, px[0], px[1], px[2]) - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); +READ_LINE_le16161616(ARGB16161616_read_line, px, px[3], px[2], px[1], px[0]) +READ_LINE_le16161616(ABGR16161616_read_line, px, px[3], px[0], px[1], px[2]) +READ_LINE_le16161616(XRGB16161616_read_line, px, cpu_to_le16(0xFFFF), px[2], px[1], px[0]) +READ_LINE_le16161616(XBGR16161616_read_line, px, cpu_to_le16(0xFFFF), px[0], px[1], px[2]) - while (out_pixel < end) { - __le16 *px = (__le16 *)src_pixels; +READ_LINE(RGB565_read_line, px, __le16, argb_u16_from_RGB565, px) +READ_LINE(BGR565_read_line, px, __le16, argb_u16_from_BGR565, px) - *out_pixel = argb_u16_from_RGB565(px); - out_pixel += 1; - src_pixels += step; - } -} +READ_LINE(R8_read_line, px, u8, argb_u16_from_gray8, *px) /* * This callback can be used for YUV formats where U and V values are @@ -521,35 +479,57 @@ static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, * - Convert YUV and YVU with the same function (a column swap is needed when setting up * plane->conversion_matrix) */ -static void semi_planar_yuv_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - u8 *y_plane; - u8 *uv_plane; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, - &y_plane); - packed_pixels_addr_1x1(plane->frame_info, - x_start / plane->frame_info->fb->format->hsub, - y_start / plane->frame_info->fb->format->vsub, 1, - &uv_plane); - int step_y = get_block_step_bytes(plane->frame_info->fb, direction, 0); - int step_uv = get_block_step_bytes(plane->frame_info->fb, direction, 1); - int subsampling = get_subsampling(plane->frame_info->fb->format, direction); - int subsampling_offset = get_subsampling_offset(direction, x_start, y_start); - const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; - - for (int i = 0; i < count; i++) { - *out_pixel = argb_u16_from_yuv888(y_plane[0], uv_plane[0], uv_plane[1], - conversion_matrix); - out_pixel += 1; - y_plane += step_y; - if ((i + subsampling_offset + 1) % subsampling == 0) - uv_plane += step_uv; - } -} +/** + * READ_LINE_YUV_SEMIPLANAR() - Generic generator for a read_line function which can be used for yuv + * formats with two planes and block_w == block_h == 1. + * + * @function_name: Function name to generate + * @pixel_1_name: temporary pixel name for the first plane used in the @__VA_ARGS__ parameters + * @pixel_2_name: temporary pixel name for the second plane used in the @__VA_ARGS__ parameters + * @pixel_1_type: Used to specify the type you want to cast the pixel pointer on the plane 1 + * @pixel_2_type: Used to specify the type you want to cast the pixel pointer on the plane 2 + * @callback: Callback to call for each pixels. This function should take + * (struct conversion_matrix*, @__VA_ARGS__) as parameter and return a pixel_argb_u16 + * __VA_ARGS__: Argument to pass inside the callback. You can use @pixel_1_name and @pixel_2_name + * to access current pixel values + */ +#define READ_LINE_YUV_SEMIPLANAR(function_name, pixel_1_name, pixel_2_name, pixel_1_type, \ + pixel_2_type, callback, ...) \ +static void function_name(const struct vkms_plane_state *plane, int x_start, \ + int y_start, enum pixel_read_direction direction, int count, \ + struct pixel_argb_u16 out_pixel[]) \ +{ \ + u8 *plane_1; \ + u8 *plane_2; \ + \ + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, \ + &plane_1); \ + packed_pixels_addr_1x1(plane->frame_info, \ + x_start / plane->frame_info->fb->format->hsub, \ + y_start / plane->frame_info->fb->format->vsub, 1, \ + &plane_2); \ + int step_1 = get_block_step_bytes(plane->frame_info->fb, direction, 0); \ + int step_2 = get_block_step_bytes(plane->frame_info->fb, direction, 1); \ + int subsampling = get_subsampling(plane->frame_info->fb->format, direction); \ + int subsampling_offset = get_subsampling_offset(direction, x_start, y_start); \ + const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; \ + \ + for (int i = 0; i < count; i++) { \ + pixel_1_type *(pixel_1_name) = (pixel_1_type *)plane_1; \ + pixel_2_type *(pixel_2_name) = (pixel_2_type *)plane_2; \ + *out_pixel = (callback)(conversion_matrix, __VA_ARGS__); \ + out_pixel += 1; \ + plane_1 += step_1; \ + if ((i + subsampling_offset + 1) % subsampling == 0) \ + plane_2 += step_2; \ + } \ +} + +READ_LINE_YUV_SEMIPLANAR(YUV888_semiplanar_read_line, y, uv, u8, u8, argb_u16_from_yuv161616, + y[0] * 257, uv[0] * 257, uv[1] * 257) +READ_LINE_YUV_SEMIPLANAR(YUV161616_semiplanar_read_line, y, uv, u16, u16, argb_u16_from_yuv161616, + y[0], uv[0], uv[1]) /* * This callback can be used for YUV format where each color component is * stored in a different plane (often called planar formats). It will @@ -586,8 +566,9 @@ static void planar_yuv_read_line(const struct vkms_plane_state *plane, int x_sta const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; for (int i = 0; i < count; i++) { - *out_pixel = argb_u16_from_yuv888(*y_plane, *channel_1_plane, *channel_2_plane, - conversion_matrix); + *out_pixel = argb_u16_from_yuv161616(conversion_matrix, + *y_plane * 257, *channel_1_plane * 257, + *channel_2_plane * 257); out_pixel += 1; y_plane += step_y; if ((i + subsampling_offset + 1) % subsampling == 0) { @@ -712,23 +693,43 @@ pixel_read_line_t get_pixel_read_line_function(u32 format) switch (format) { case DRM_FORMAT_ARGB8888: return &ARGB8888_read_line; - case DRM_FORMAT_XRGB8888: - return &XRGB8888_read_line; case DRM_FORMAT_ABGR8888: return &ABGR8888_read_line; + case DRM_FORMAT_BGRA8888: + return &BGRA8888_read_line; + case DRM_FORMAT_RGBA8888: + return &RGBA8888_read_line; + case DRM_FORMAT_XRGB8888: + return &XRGB8888_read_line; + case DRM_FORMAT_XBGR8888: + return &XBGR8888_read_line; + case DRM_FORMAT_RGB888: + return &RGB888_read_line; + case DRM_FORMAT_BGR888: + return &BGR888_read_line; case DRM_FORMAT_ARGB16161616: return &ARGB16161616_read_line; + case DRM_FORMAT_ABGR16161616: + return &ABGR16161616_read_line; case DRM_FORMAT_XRGB16161616: return &XRGB16161616_read_line; + case DRM_FORMAT_XBGR16161616: + return &XBGR16161616_read_line; case DRM_FORMAT_RGB565: return &RGB565_read_line; + case DRM_FORMAT_BGR565: + return &BGR565_read_line; case DRM_FORMAT_NV12: case DRM_FORMAT_NV16: case DRM_FORMAT_NV24: case DRM_FORMAT_NV21: case DRM_FORMAT_NV61: case DRM_FORMAT_NV42: - return &semi_planar_yuv_read_line; + return &YUV888_semiplanar_read_line; + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + return &YUV161616_semiplanar_read_line; case DRM_FORMAT_YUV420: case DRM_FORMAT_YUV422: case DRM_FORMAT_YUV444: diff --git a/drivers/gpu/drm/vkms/vkms_formats.h b/drivers/gpu/drm/vkms/vkms_formats.h index b4fe62ab9c65..eeb208cdd6b1 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.h +++ b/drivers/gpu/drm/vkms/vkms_formats.h @@ -14,8 +14,8 @@ void get_conversion_matrix_to_argb_u16(u32 format, enum drm_color_encoding encod struct conversion_matrix *matrix); #if IS_ENABLED(CONFIG_KUNIT) -struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, u8 channel_2, - const struct conversion_matrix *matrix); +struct pixel_argb_u16 argb_u16_from_yuv161616(const struct conversion_matrix *matrix, + u16 y, u16 channel_1, u16 channel_2); #endif #endif /* _VKMS_FORMATS_H_ */ diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index e3fdd161d0f0..e592e47a5736 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -14,11 +14,19 @@ static const u32 vkms_formats[] = { DRM_FORMAT_ARGB8888, - DRM_FORMAT_XRGB8888, DRM_FORMAT_ABGR8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_XRGB16161616, + DRM_FORMAT_XBGR16161616, DRM_FORMAT_ARGB16161616, + DRM_FORMAT_ABGR16161616, DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV24, @@ -31,6 +39,9 @@ static const u32 vkms_formats[] = { DRM_FORMAT_YVU420, DRM_FORMAT_YVU422, DRM_FORMAT_YVU444, + DRM_FORMAT_P010, + DRM_FORMAT_P012, + DRM_FORMAT_P016, DRM_FORMAT_R1, DRM_FORMAT_R2, DRM_FORMAT_R4, diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 07c71a29963d..987e4fe10538 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -35,6 +35,7 @@ $(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ + xe_dep_scheduler.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -60,7 +61,6 @@ xe-y += xe_bb.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ xe_gt_throttle.o \ - xe_gt_tlb_invalidation.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ @@ -75,16 +75,19 @@ xe-y += xe_bb.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ + xe_guc_tlb_inval.o \ xe_heci_gsc.o \ xe_huc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_engine_group.o \ + xe_hw_error.o \ xe_hw_fence.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ xe_mmio.o \ + xe_mmio_gem.o \ xe_mocs.o \ xe_module.o \ xe_nvm.o \ @@ -95,6 +98,7 @@ xe-y += xe_bb.o \ xe_pcode.o \ xe_pm.o \ xe_preempt_fence.o \ + xe_psmi.o \ xe_pt.o \ xe_pt_walk.o \ xe_pxp.o \ @@ -114,6 +118,8 @@ xe-y += xe_bb.o \ xe_sync.o \ xe_tile.o \ xe_tile_sysfs.o \ + xe_tlb_inval.o \ + xe_tlb_inval_job.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ @@ -125,6 +131,7 @@ xe-y += xe_bb.o \ xe_uc.o \ xe_uc_fw.o \ xe_vm.o \ + xe_vm_madvise.o \ xe_vram.o \ xe_vram_freq.o \ xe_vsec.o \ @@ -149,6 +156,7 @@ xe-y += \ xe_memirq.o \ xe_sriov.o \ xe_sriov_vf.o \ + xe_sriov_vf_ccs.o \ xe_tile_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 81eb046aeebf..d8cf68a0516d 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -193,6 +193,14 @@ enum xe_guc_register_context_multi_lrc_param_offsets { XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, }; +enum xe_guc_context_wq_item_offsets { + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN = 0, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID, + XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL, +}; + enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index ecf748fd87df..ad76b4baf42e 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -63,6 +63,7 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, XE_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06, XE_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, + XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH = 0x08, XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -75,6 +76,8 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75, + XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG = 0x76, XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, XE_GUC_LOAD_STATUS_READY = 0xF0, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0366a9da5977..0e78351c6ef5 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -390,12 +390,14 @@ enum { */ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT = 0x9004, GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 9b7572e06f34..b8269391bc69 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -12,7 +12,6 @@ #include <drm/drm_drv.h> -#include "i915_utils.h" #include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" @@ -26,34 +25,13 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) #define IS_BROADWELL(dev_priv) (dev_priv && 0) -#define IS_SKYLAKE(dev_priv) (dev_priv && 0) #define IS_BROXTON(dev_priv) (dev_priv && 0) -#define IS_KABYLAKE(dev_priv) (dev_priv && 0) #define IS_GEMINILAKE(dev_priv) (dev_priv && 0) -#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) -#define IS_COMETLAKE(dev_priv) (dev_priv && 0) -#define IS_ICELAKE(dev_priv) (dev_priv && 0) -#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) -#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ - IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) -#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) -#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) -#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE) - -#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c index 43b10a2cc508..1421c2a7b64d 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_utils.c +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_utils.h" bool i915_vtd_active(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index fba9617a75a5..d96ba2b51065 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -41,7 +41,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); - if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) { + if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index e2e0771cf274..8b68d70db6c8 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -96,6 +96,7 @@ static void xe_display_fini_early(void *arg) if (!xe->info.probe_display) return; + intel_hpd_cancel_work(display); intel_display_driver_remove_nogem(display); intel_display_driver_remove_noirq(display); intel_opregion_cleanup(display); @@ -340,6 +341,8 @@ void xe_display_pm_suspend(struct xe_device *xe) xe_display_flush_cleanup_work(xe); + intel_encoder_block_all_hpds(display); + intel_hpd_cancel_work(display); if (has_display(xe)) { @@ -370,6 +373,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(display); + intel_encoder_block_all_hpds(display); intel_hpd_cancel_work(display); if (has_display(xe)) @@ -471,6 +475,8 @@ void xe_display_pm_resume(struct xe_device *xe) intel_hpd_init(display); + intel_encoder_unblock_all_hpds(display); + if (has_display(xe)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 68d1387d81a0..8ada1cbcb16c 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -14,5 +14,5 @@ bool intel_display_needs_wa_16023588340(struct intel_display *display) { struct xe_device *xe = to_xe_device(display->drm); - return XE_WA(xe_root_mmio_gt(xe), 16023588340); + return XE_GT_WA(xe_root_mmio_gt(xe), 16023588340); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index c38fba18effe..f1f8b5ab53ef 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -16,6 +16,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_pm.h" +#include "xe_vram_types.h" static void write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, @@ -289,7 +290,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram; /* * If we need to able to access the clear-color value stored in @@ -297,7 +298,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * accessible. This is important on small-bar systems where * only some subset of VRAM is CPU accessible. */ - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) { ret = -EINVAL; goto err; } @@ -382,6 +383,7 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, const struct intel_plane_state *old_plane_state) { struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb); + struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); struct xe_device *xe = to_xe_device(fb->base.dev); struct intel_display *display = xe->display; struct i915_vma *vma; @@ -405,6 +407,10 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, found: refcount_inc(&vma->ref); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return true; } @@ -431,6 +437,10 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, return PTR_ERR(vma); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index dcbc4b2d3fd9..826ac3d578b7 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,6 +10,7 @@ #include "xe_ggtt.h" #include "xe_mmio.h" +#include "i915_vma.h" #include "intel_crtc.h" #include "intel_display.h" #include "intel_display_core.h" @@ -21,6 +22,7 @@ #include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" +#include "xe_vram_types.h" #include "xe_wa.h" #include <generated/xe_wa_oob.h> @@ -103,7 +105,7 @@ initial_plane_bo(struct xe_device *xe, * We don't currently expect this to ever be placed in the * stolen portion. */ - if (phys_base >= tile0->mem.vram.usable_size) { + if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) { drm_err(&xe->drm, "Initial plane programming using invalid range, phys_base=%pa\n", &phys_base); @@ -121,7 +123,7 @@ initial_plane_bo(struct xe_device *xe, phys_base = base; flags |= XE_BO_FLAG_STOLEN; - if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) + if (XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) return NULL; /* @@ -234,6 +236,9 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, goto nofb; plane_state->ggtt_vma = vma; + + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma); + plane_state->uapi.src_x = 0; plane_state->uapi.src_y = 0; plane_state->uapi.src_w = fb->width << 16; diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index e3f5e8bb3ebc..c47b290e0e9f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -65,6 +65,7 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_LRM_ASYNC REG_BIT(21) #define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3)) #define MI_LRR_DST_CS_MMIO REG_BIT(19) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7ade41e2b7b3..f4c3e1187a00 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -111,6 +111,9 @@ #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) #define CS_PRIORITY_MEM_READ REG_BIT(7) +#define CS_DEBUG_MODE2(base) XE_REG((base) + 0xd8, XE_REG_OPTION_MASKED) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) + #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 9b66cc972a63..180be82672ab 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -13,6 +13,8 @@ /* Definitions of GSC H/W registers, bits, etc */ +#define BMG_GSC_HECI1_BASE 0x373000 + #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5cd5ab8529c5..f96b2e2b3064 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -42,7 +42,7 @@ #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) -#define MCFG_MCR_SELECTOR XE_REG(0xfd0) +#define STEER_SEMAPHORE XE_REG(0xfd0) #define MTL_MCR_SELECTOR XE_REG(0xfd4) #define SF_MCR_SELECTOR XE_REG(0xfd8) #define MCR_SELECTOR XE_REG(0xfdc) diff --git a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h new file mode 100644 index 000000000000..c146b9ef44eb --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_HW_ERROR_REGS_H_ +#define _XE_HW_ERROR_REGS_H_ + +#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118) +#define UNCORR_FW_REPORTED_ERR BIT(6) + +#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124) + +#define DEV_ERR_STAT_NONFATAL 0x100178 +#define DEV_ERR_STAT_CORRECTABLE 0x10017c +#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \ + DEV_ERR_STAT_CORRECTABLE, \ + DEV_ERR_STAT_NONFATAL)) +#define XE_CSC_ERROR BIT(17) +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index 13635e4331d4..7c2a3a140142 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -18,6 +18,7 @@ #define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) +#define ERROR_IRQ(x) REG_BIT(26 + (x)) #define DISPLAY_IRQ REG_BIT(16) #define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index 2995d72c3f78..264e9baf949c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -21,4 +21,14 @@ #define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) #define SG_REMAP_BITS REG_GENMASK(31, 24) +#define BMG_MODS_RESIDENCY_OFFSET (0x4D0) +#define BMG_G2_RESIDENCY_OFFSET (0x530) +#define BMG_G6_RESIDENCY_OFFSET (0x538) +#define BMG_G8_RESIDENCY_OFFSET (0x540) +#define BMG_G10_RESIDENCY_OFFSET (0x548) + +#define BMG_PCIE_LINK_L0_RESIDENCY_OFFSET (0x570) +#define BMG_PCIE_LINK_L1_RESIDENCY_OFFSET (0x578) +#define BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET (0x580) + #endif diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index bb469096d072..7b40cc8be1c9 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -236,7 +236,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc } xe_bo_lock(external, false); - err = xe_bo_pin_external(external); + err = xe_bo_pin_external(external, false); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c53f67ce4b0a..5baeab6b6fb7 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -57,16 +57,12 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; /* - * Evict exporter. Note that the gem object dma_buf member isn't - * set from xe_gem_prime_export(), and it's needed for the move_notify() - * functionality, so hack that up here. Evicting the exported bo will + * Evict exporter. Evicting the exported bo will * evict also the imported bo through the move_notify() functionality if * importer is on a different device. If they're on the same device, * the exporter and the importer should be the same bo. */ - swap(exported->ttm.base.dma_buf, dmabuf); ret = xe_bo_evict(exported); - swap(exported->ttm.base.dma_buf, dmabuf); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", @@ -89,15 +85,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; } - /* - * If on different devices, the exporter is kept in system if - * possible, saving a migration step as the transfer is just - * likely as fast from system memory. - */ - if (params->mem_mask & XE_BO_FLAG_SYSTEM) - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); - else - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); if (params->force_different_devices) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); @@ -139,6 +127,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) PTR_ERR(dmabuf)); goto out; } + bo->ttm.base.dma_buf = dmabuf; import = xe_gem_prime_import(&xe->drm, dmabuf); if (!IS_ERR(import)) { @@ -186,6 +175,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", PTR_ERR(import)); } + bo->ttm.base.dma_buf = NULL; dma_buf_put(dmabuf); out: drm_gem_object_put(&bo->ttm.base); @@ -206,7 +196,7 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = { static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_VRAM0 | XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, @@ -238,7 +228,8 @@ static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 9c715e59f030..db30c5156d0c 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -101,6 +101,11 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, } } +static void fake_xe_info_probe_tile_count(struct xe_device *xe) +{ + /* Nothing to do, just use the statically defined value. */ +} + int xe_pci_fake_device_init(struct xe_device *xe) { struct kunit *test = kunit_get_current_test(); @@ -138,6 +143,8 @@ done: data->sriov_mode : XE_SRIOV_MODE_NONE; kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); + kunit_activate_static_stub(test, xe_info_probe_tile_count, + fake_xe_info_probe_tile_count); xe_info_init_early(xe, desc, subplatform_desc); xe_info_init(xe, desc); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index c96d1fe34151..416258c193f6 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -75,6 +75,7 @@ static const struct platform_test_case cases[] = { GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), + GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), }; static void platform_desc(const struct platform_test_case *t, char *desc) diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 68fe70ce2be3..a818eaa05b7d 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -12,6 +12,7 @@ #include "xe_gt_types.h" #include "xe_step.h" +#include "xe_vram.h" /** * DOC: Xe Asserts @@ -145,7 +146,8 @@ const struct xe_tile *__tile = (tile); \ char __buf[10] __maybe_unused; \ xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ - __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + __tile->id, ({ string_get_size( \ + xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \ STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ }) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 5ce0e26822f2..feb6e013dc38 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -60,6 +60,41 @@ err: return ERR_PTR(err); } +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id) +{ + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_sa_manager *bb_pool; + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + /* + * We need to allocate space for the requested number of dwords & + * one additional MI_BATCH_BUFFER_END dword. Since the whole SA + * is submitted to HW, we need to make sure that the last instruction + * is not over written when the last chunk of SA is allocated for BB. + * So, this extra DW acts as a guard here. + */ + + bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; + bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); + + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + static struct xe_sched_job * __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index b5cc65506696..2a8adc9a6dee 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -13,8 +13,11 @@ struct dma_fence; struct xe_gt; struct xe_exec_queue; struct xe_sched_job; +enum xe_sriov_vf_ccs_rw_ctxs; struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 18f27da47a36..870f43347281 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -33,9 +33,11 @@ #include "xe_pxp.h" #include "xe_res_cursor.h" #include "xe_shrinker.h" +#include "xe_sriov_vf_ccs.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +#include "xe_vram_types.h" const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { [XE_PL_SYSTEM] = "system", @@ -186,6 +188,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, + .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ? + TTM_PL_FLAG_FALLBACK : 0, }; *c += 1; } @@ -198,6 +202,8 @@ static bool force_contiguous(u32 bo_flags) else if (bo_flags & XE_BO_FLAG_PINNED && !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) return true; /* needs vmap */ + else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR) + return true; /* * For eviction / restore on suspend / resume objects pinned in VRAM @@ -812,14 +818,14 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; } - tt_has_data = ttm && (ttm_tt_is_populated(ttm) || - (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm)); move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data)); @@ -964,6 +970,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); xe_pm_runtime_put(xe); + /* + * CCS meta data is migrated from TT -> SMEM. So, let us detach the + * BBs from BO as it is no longer needed. + */ + if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) + xe_sriov_vf_ccs_detach_bo(bo); + + if (IS_SRIOV_VF(xe) && + ((move_lacks_source && new_mem->mem_type == XE_PL_TT) || + (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) && + handle_system_ccs) + ret = xe_sriov_vf_ccs_attach_bo(bo); + out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && ttm_bo->ttm) { @@ -974,6 +994,9 @@ out: if (timeout < 0) ret = timeout; + if (IS_VF_CCS_BB_VALID(xe, bo)) + xe_sriov_vf_ccs_detach_bo(bo); + xe_tt_unmap_sg(xe, ttm_bo->ttm); } @@ -1501,9 +1524,14 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) { + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + if (!xe_bo_is_xe_bo(ttm_bo)) return; + if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo)) + xe_sriov_vf_ccs_detach_bo(bo); + /* * Object is idle and about to be destroyed. Release the * dma-buf attachment. @@ -1685,6 +1713,18 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } +static bool should_migrate_to_smem(struct xe_bo *bo) +{ + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + + return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL || + bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; +} + static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; @@ -1693,7 +1733,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; vm_fault_t ret; - int idx; + int idx, r = 0; if (needs_rpm) xe_pm_runtime_get(xe); @@ -1705,25 +1745,40 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) if (drm_dev_enter(ddev, &idx)) { trace_xe_bo_cpu_fault(bo); - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + if (should_migrate_to_smem(bo)) { + xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM); + + r = xe_bo_migrate(bo, XE_PL_TT); + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) + ret = VM_FAULT_NOPAGE; + else if (r) + ret = VM_FAULT_SIGBUS; + } + if (!ret) + ret = ttm_bo_vm_fault_reserved(vmf, + vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); + + if (ret == VM_FAULT_RETRY && + !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + goto out; + + /* + * ttm_bo_vm_reserve() already has dma_resv_lock. + */ + if (ret == VM_FAULT_NOPAGE && + mem_type_is_vram(tbo->resource->mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (list_empty(&bo->vram_userfault_link)) + list_add(&bo->vram_userfault_link, + &xe->mem_access.vram_userfault.list); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); } - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - goto out; - /* - * ttm_bo_vm_reserve() already has dma_resv_lock. - */ - if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) { - mutex_lock(&xe->mem_access.vram_userfault.lock); - if (list_empty(&bo->vram_userfault_link)) - list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list); - mutex_unlock(&xe->mem_access.vram_userfault.lock); - } - dma_resv_unlock(tbo->base.resv); out: if (needs_rpm) @@ -2269,6 +2324,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) /** * xe_bo_pin_external - pin an external BO * @bo: buffer object to be pinned + * @in_place: Pin in current placement, don't attempt to migrate. * * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) * BO. Unique call compared to xe_bo_pin as this function has it own set of @@ -2276,7 +2332,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * * Returns 0 for success, negative error code otherwise. */ -int xe_bo_pin_external(struct xe_bo *bo) +int xe_bo_pin_external(struct xe_bo *bo, bool in_place) { struct xe_device *xe = xe_bo_device(bo); int err; @@ -2285,9 +2341,11 @@ int xe_bo_pin_external(struct xe_bo *bo) xe_assert(xe, xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { - err = xe_bo_validate(bo, NULL, false); - if (err) - return err; + if (!in_place) { + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + } spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.late.external); @@ -2438,9 +2496,11 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; - struct pin_cookie cookie; int ret; + if (xe_bo_is_pinned(bo)) + return 0; + if (vm) { lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); @@ -2449,10 +2509,10 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } - cookie = xe_vm_set_validating(vm, allow_res_evict); + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); - xe_vm_clear_validating(vm, allow_res_evict, cookie); + xe_vm_clear_validating(vm, allow_res_evict); return ret; } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02e8cde4c6b2..cfb1ec266a6d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -12,6 +12,7 @@ #include "xe_macros.h" #include "xe_vm_types.h" #include "xe_vm.h" +#include "xe_vram_types.h" #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -23,8 +24,9 @@ #define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ #define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id)) #define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ - XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_VRAM((tile)->mem.vram) : \ XE_BO_FLAG_SYSTEM) #define XE_BO_FLAG_GGTT BIT(5) #define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) @@ -198,7 +200,7 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -int xe_bo_pin_external(struct xe_bo *bo); +int xe_bo_pin_external(struct xe_bo *bo, bool in_place); int xe_bo_pin(struct xe_bo *bo); void xe_bo_unpin_external(struct xe_bo *bo); void xe_bo_unpin(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff560d82496f..314652afdca7 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -9,6 +9,7 @@ #include <linux/iosys-map.h> #include <drm/drm_gpusvm.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> @@ -60,6 +61,14 @@ struct xe_bo { */ struct list_head client_link; #endif + /** @attr: User controlled attributes for bo */ + struct { + /** + * @atomic_access: type of atomic access bo needs + * protected by bo dma-resv lock + */ + u32 atomic_access; + } attr; /** * @pxp_key_instance: PXP key instance this BO was created against. A * 0 in this variable indicates that the BO does not use PXP encryption. @@ -76,6 +85,9 @@ struct xe_bo { /** @ccs_cleared */ bool ccs_cleared; + /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF */ + struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + /** * @cpu_caching: CPU caching mode. Currently only used for userspace * objects. Exceptions are system memory on DGFX, which is always diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index e9b46a2d0019..1025d3979b06 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -5,6 +5,7 @@ #include <linux/bitops.h> #include <linux/configfs.h> +#include <linux/cleanup.h> #include <linux/find.h> #include <linux/init.h> #include <linux/module.h> @@ -12,9 +13,9 @@ #include <linux/string.h> #include "xe_configfs.h" -#include "xe_module.h" - #include "xe_hw_engine_types.h" +#include "xe_module.h" +#include "xe_pci_types.h" /** * DOC: Xe Configfs @@ -23,23 +24,45 @@ * ========= * * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a - * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory - * The user can create devices under this directory and configure them as necessary - * See Documentation/filesystems/configfs.rst for more information about how configfs works. + * configfs subsystem called ``xe`` that creates a directory in the mounted + * configfs directory. The user can create devices under this directory and + * configure them as necessary. See Documentation/filesystems/configfs.rst for + * more information about how configfs works. * * Create devices - * =============== + * ============== * - * In order to create a device, the user has to create a directory inside ``'xe'``:: + * To create a device, the ``xe`` module should already be loaded, but some + * attributes can only be set before binding the device. It can be accomplished + * by blocking the driver autoprobe: * - * mkdir /sys/kernel/config/xe/0000:03:00.0/ + * # echo 0 > /sys/bus/pci/drivers_autoprobe + * # modprobe xe + * + * In order to create a device, the user has to create a directory inside ``xe``:: + * + * # mkdir /sys/kernel/config/xe/0000:03:00.0/ * * Every device created is populated by the driver with entries that can be * used to configure it:: * * /sys/kernel/config/xe/ - * .. 0000:03:00.0/ - * ... survivability_mode + * ├── 0000:00:02.0 + * │ └── ... + * ├── 0000:00:02.1 + * │ └── ... + * : + * └── 0000:03:00.0 + * ├── survivability_mode + * ├── engines_allowed + * └── enable_psmi + * + * After configuring the attributes as per next section, the device can be + * probed with:: + * + * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind + * # # or + * # echo 0000:03:00.0 > /sys/bus/pci/drivers_probe * * Configure Attributes * ==================== @@ -51,7 +74,8 @@ * effect when probing the device. Example to enable it:: * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode - * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) + * + * This attribute can only be set before binding to the device. * * Allowed engines: * ---------------- @@ -77,24 +101,52 @@ * available for migrations, but it's disabled. This is intended for debugging * purposes only. * + * This attribute can only be set before binding to the device. + * + * PSMI + * ---- + * + * Enable extra debugging capabilities to trace engine execution. Only useful + * during early platform enabling and requires additional hardware connected. + * Once it's enabled, additionals WAs are added and runtime configuration is + * done via debugfs. Example to enable it:: + * + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/enable_psmi + * + * This attribute can only be set before binding to the device. + * * Remove devices * ============== * * The created device directories can be removed using ``rmdir``:: * - * rmdir /sys/kernel/config/xe/0000:03:00.0/ + * # rmdir /sys/kernel/config/xe/0000:03:00.0/ */ -struct xe_config_device { +struct xe_config_group_device { struct config_group group; - bool survivability_mode; - u64 engines_allowed; + struct xe_config_device { + u64 engines_allowed; + bool survivability_mode; + bool enable_psmi; + } config; /* protects attributes */ struct mutex lock; }; +static const struct xe_config_device device_defaults = { + .engines_allowed = U64_MAX, + .survivability_mode = false, + .enable_psmi = false, +}; + +static void set_device_defaults(struct xe_config_device *config) +{ + *config = device_defaults; +} + struct engine_info { const char *cls; u64 mask; @@ -113,9 +165,40 @@ static const struct engine_info engine_info[] = { { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, }; +static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) +{ + return container_of(to_config_group(item), struct xe_config_group_device, group); +} + static struct xe_config_device *to_xe_config_device(struct config_item *item) { - return container_of(to_config_group(item), struct xe_config_device, group); + return &to_xe_config_group_device(item)->config; +} + +static bool is_bound(struct xe_config_group_device *dev) +{ + unsigned int domain, bus, slot, function; + struct pci_dev *pdev; + const char *name; + bool ret; + + lockdep_assert_held(&dev->lock); + + name = dev->group.cg_item.ci_name; + if (sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function) != 4) + return false; + + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev) + return false; + + ret = pci_get_drvdata(pdev); + pci_dev_put(pdev); + + if (ret) + pci_dbg(pdev, "Already bound to driver\n"); + + return ret; } static ssize_t survivability_mode_show(struct config_item *item, char *page) @@ -127,7 +210,7 @@ static ssize_t survivability_mode_show(struct config_item *item, char *page) static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); bool survivability_mode; int ret; @@ -135,9 +218,11 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa if (ret) return ret; - mutex_lock(&dev->lock); - dev->survivability_mode = survivability_mode; - mutex_unlock(&dev->lock); + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.survivability_mode = survivability_mode; return len; } @@ -199,7 +284,7 @@ static bool lookup_engine_mask(const char *pattern, u64 *mask) static ssize_t engines_allowed_store(struct config_item *item, const char *page, size_t len) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); size_t patternlen, p; u64 mask, val = 0; @@ -219,25 +304,55 @@ static ssize_t engines_allowed_store(struct config_item *item, const char *page, val |= mask; } - mutex_lock(&dev->lock); - dev->engines_allowed = val; - mutex_unlock(&dev->lock); + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.engines_allowed = val; return len; } -CONFIGFS_ATTR(, survivability_mode); +static ssize_t enable_psmi_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + return sprintf(page, "%d\n", dev->enable_psmi); +} + +static ssize_t enable_psmi_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + bool val; + int ret; + + ret = kstrtobool(page, &val); + if (ret) + return ret; + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.enable_psmi = val; + + return len; +} + +CONFIGFS_ATTR(, enable_psmi); CONFIGFS_ATTR(, engines_allowed); +CONFIGFS_ATTR(, survivability_mode); static struct configfs_attribute *xe_config_device_attrs[] = { - &attr_survivability_mode, + &attr_enable_psmi, &attr_engines_allowed, + &attr_survivability_mode, NULL, }; static void xe_config_device_release(struct config_item *item) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); mutex_destroy(&dev->lock); kfree(dev); @@ -253,29 +368,81 @@ static const struct config_item_type xe_config_device_type = { .ct_owner = THIS_MODULE, }; +static const struct xe_device_desc *xe_match_desc(struct pci_dev *pdev) +{ + struct device_driver *driver = driver_find("xe", &pci_bus_type); + struct pci_driver *drv = to_pci_driver(driver); + const struct pci_device_id *ids = drv ? drv->id_table : NULL; + const struct pci_device_id *found = pci_match_id(ids, pdev); + + return found ? (const void *)found->driver_data : NULL; +} + +static struct pci_dev *get_physfn_instead(struct pci_dev *virtfn) +{ + struct pci_dev *physfn = pci_physfn(virtfn); + + pci_dev_get(physfn); + pci_dev_put(virtfn); + return physfn; +} + static struct config_group *xe_config_make_device_group(struct config_group *group, const char *name) { unsigned int domain, bus, slot, function; - struct xe_config_device *dev; + struct xe_config_group_device *dev; + const struct xe_device_desc *match; struct pci_dev *pdev; + char canonical[16]; + int vfnumber = 0; int ret; - ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function); + ret = sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function); if (ret != 4) return ERR_PTR(-EINVAL); + ret = scnprintf(canonical, sizeof(canonical), "%04x:%02x:%02x.%d", domain, bus, + PCI_SLOT(PCI_DEVFN(slot, function)), + PCI_FUNC(PCI_DEVFN(slot, function))); + if (ret != 12 || strcmp(name, canonical)) + return ERR_PTR(-EINVAL); + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev && function) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, 0)); + if (!pdev && slot) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(0, 0)); if (!pdev) return ERR_PTR(-ENODEV); + + if (PCI_DEVFN(slot, function) != pdev->devfn) { + pdev = get_physfn_instead(pdev); + vfnumber = PCI_DEVFN(slot, function) - pdev->devfn; + if (!dev_is_pf(&pdev->dev) || vfnumber > pci_sriov_get_totalvfs(pdev)) { + pci_dev_put(pdev); + return ERR_PTR(-ENODEV); + } + } + + match = xe_match_desc(pdev); + if (match && vfnumber && !match->has_sriov) { + pci_info(pdev, "xe driver does not support VFs on this device\n"); + match = NULL; + } else if (!match) { + pci_info(pdev, "xe driver does not support configuration of this device\n"); + } + pci_dev_put(pdev); + if (!match) + return ERR_PTR(-ENOENT); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); - /* Default values */ - dev->engines_allowed = U64_MAX; + set_device_defaults(&dev->config); config_group_init_type_name(&dev->group, name, &xe_config_device_type); @@ -302,102 +469,145 @@ static struct configfs_subsystem xe_configfs = { }, }; -static struct xe_config_device *configfs_find_group(struct pci_dev *pdev) +static struct xe_config_group_device *find_xe_config_group_device(struct pci_dev *pdev) { struct config_item *item; - char name[64]; - - snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus), - pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); mutex_lock(&xe_configfs.su_mutex); - item = config_group_find_item(&xe_configfs.su_group, name); + item = config_group_find_item(&xe_configfs.su_group, pci_name(pdev)); mutex_unlock(&xe_configfs.su_mutex); if (!item) return NULL; - return to_xe_config_device(item); + return to_xe_config_group_device(item); +} + +static void dump_custom_dev_config(struct pci_dev *pdev, + struct xe_config_group_device *dev) +{ +#define PRI_CUSTOM_ATTR(fmt_, attr_) do { \ + if (dev->config.attr_ != device_defaults.attr_) \ + pci_info(pdev, "configfs: " __stringify(attr_) " = " fmt_ "\n", \ + dev->config.attr_); \ + } while (0) + + PRI_CUSTOM_ATTR("%llx", engines_allowed); + PRI_CUSTOM_ATTR("%d", enable_psmi); + PRI_CUSTOM_ATTR("%d", survivability_mode); + +#undef PRI_CUSTOM_ATTR +} + +/** + * xe_configfs_check_device() - Test if device was configured by configfs + * @pdev: the &pci_dev device to test + * + * Try to find the configfs group that belongs to the specified pci device + * and print a diagnostic message if different than the default value. + */ +void xe_configfs_check_device(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + + if (!dev) + return; + + /* memcmp here is safe as both are zero-initialized */ + if (memcmp(&dev->config, &device_defaults, sizeof(dev->config))) { + pci_info(pdev, "Found custom settings in configfs\n"); + dump_custom_dev_config(pdev, dev); + } + + config_group_put(&dev->group); } /** * xe_configfs_get_survivability_mode - get configfs survivability mode attribute * @pdev: pci device * - * find the configfs group that belongs to the pci device and return - * the survivability mode attribute - * - * Return: survivability mode if config group is found, false otherwise + * Return: survivability_mode attribute in configfs */ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); bool mode; if (!dev) - return false; + return device_defaults.survivability_mode; - mode = dev->survivability_mode; - config_item_put(&dev->group.cg_item); + mode = dev->config.survivability_mode; + config_group_put(&dev->group); return mode; } /** - * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute + * xe_configfs_clear_survivability_mode - clear configfs survivability mode * @pdev: pci device - * - * find the configfs group that belongs to the pci device and clear survivability - * mode attribute */ void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); if (!dev) return; - mutex_lock(&dev->lock); - dev->survivability_mode = 0; - mutex_unlock(&dev->lock); + guard(mutex)(&dev->lock); + dev->config.survivability_mode = 0; - config_item_put(&dev->group.cg_item); + config_group_put(&dev->group); } /** * xe_configfs_get_engines_allowed - get engine allowed mask from configfs * @pdev: pci device * - * Find the configfs group that belongs to the pci device and return - * the mask of engines allowed to be used. - * - * Return: engine mask with allowed engines + * Return: engine mask with allowed engines set in configfs */ u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); u64 engines_allowed; if (!dev) - return U64_MAX; + return device_defaults.engines_allowed; - engines_allowed = dev->engines_allowed; - config_item_put(&dev->group.cg_item); + engines_allowed = dev->config.engines_allowed; + config_group_put(&dev->group); return engines_allowed; } +/** + * xe_configfs_get_psmi_enabled - get configfs enable_psmi setting + * @pdev: pci device + * + * Return: enable_psmi setting in configfs + */ +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + bool ret; + + if (!dev) + return false; + + ret = dev->config.enable_psmi; + config_item_put(&dev->group.cg_item); + + return ret; +} + int __init xe_configfs_init(void) { - struct config_group *root = &xe_configfs.su_group; int ret; - config_group_init(root); + config_group_init(&xe_configfs.su_group); mutex_init(&xe_configfs.su_mutex); ret = configfs_register_subsystem(&xe_configfs); if (ret) { - pr_err("Error %d while registering %s subsystem\n", - ret, root->cg_item.ci_namebuf); + mutex_destroy(&xe_configfs.su_mutex); return ret; } @@ -407,5 +617,5 @@ int __init xe_configfs_init(void) void __exit xe_configfs_exit(void) { configfs_unregister_subsystem(&xe_configfs); + mutex_destroy(&xe_configfs.su_mutex); } - diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index fb8764008089..58c8c3164000 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -13,15 +13,19 @@ struct pci_dev; #if IS_ENABLED(CONFIG_CONFIGFS_FS) int xe_configfs_init(void); void xe_configfs_exit(void); +void xe_configfs_check_device(struct pci_dev *pdev); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); #else static inline int xe_configfs_init(void) { return 0; } static inline void xe_configfs_exit(void) { } +static inline void xe_configfs_check_device(struct pci_dev *pdev) { } static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } +static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 26e9d146ccbf..8d6df6bd885e 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -11,18 +11,22 @@ #include <drm/drm_debugfs.h> +#include "regs/xe_pmt.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_debugfs.h" #include "xe_gt_printk.h" #include "xe_guc_ads.h" +#include "xe_mmio.h" #include "xe_pm.h" +#include "xe_psmi.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" #include "xe_step.h" #include "xe_wa.h" +#include "xe_vsec.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -31,6 +35,24 @@ #endif DECLARE_FAULT_ATTR(gt_reset_failure); +DECLARE_FAULT_ATTR(inject_csc_hw_error); + +static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, + u32 offset, char *name, struct drm_printer *p) +{ + u64 residency = 0; + int ret; + + ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &residency, offset, sizeof(residency)); + if (ret != sizeof(residency)) { + drm_warn(&xe->drm, "%s counter failed to read, ret %d\n", name, ret); + return; + } + + drm_printf(p, "%s : %llu\n", name, residency); +} static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -102,12 +124,72 @@ static int workaround_info(struct seq_file *m, void *data) return 0; } +static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + struct { + u32 offset; + char *name; + } residencies[] = { + {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, + {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, + {BMG_G8_RESIDENCY_OFFSET, "Package G8"}, + {BMG_G10_RESIDENCY_OFFSET, "Package G10"}, + {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + +static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + + struct { + u32 offset; + char *name; + } residencies[] = { + {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"}, + {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"}, + {BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET, "PCIE LINK L1.2 RESIDENCY"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, { .name = "workarounds", .show = workaround_info, }, }; +static const struct drm_info_list debugfs_residencies[] = { + { .name = "dgfx_pkg_residencies", .show = dgfx_pkg_residencies_show, }, + { .name = "dgfx_pcie_link_residencies", .show = dgfx_pcie_link_residencies_show, }, +}; + static int forcewake_open(struct inode *inode, struct file *file) { struct xe_device *xe = inode->i_private; @@ -247,20 +329,47 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = { .write = atomic_svm_timeslice_ms_set, }; +static void create_tile_debugfs(struct xe_tile *tile, struct dentry *root) +{ + char name[8]; + + snprintf(name, sizeof(name), "tile%u", tile->id); + tile->debugfs = debugfs_create_dir(name, root); + if (IS_ERR(tile->debugfs)) + return; + + /* + * Store the xe_tile pointer as private data of the tile/ directory + * node so other tile specific attributes under that directory may + * refer to it by looking at its parent node private data. + */ + tile->debugfs->d_inode->i_private = tile; +} + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; struct drm_minor *minor = xe->drm.primary; struct dentry *root = minor->debugfs_root; struct ttm_resource_manager *man; + struct xe_tile *tile; struct xe_gt *gt; u32 mem_type; + u8 tile_id; u8 id; drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), root, minor); + if (xe->info.platform == XE_BATTLEMAGE) { + drm_debugfs_create_files(debugfs_residencies, + ARRAY_SIZE(debugfs_residencies), + root, minor); + fault_create_debugfs_attr("inject_csc_hw_error", root, + &inject_csc_hw_error); + } + debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); @@ -288,11 +397,16 @@ void xe_debugfs_register(struct xe_device *xe) if (man) ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); + for_each_tile(tile, xe, tile_id) + create_tile_debugfs(tile, root); + for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); xe_pxp_debugfs_register(xe->pxp); + xe_psmi_debugfs_register(xe); + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); if (IS_SRIOV_PF(xe)) diff --git a/drivers/gpu/drm/xe/xe_dep_job_types.h b/drivers/gpu/drm/xe/xe_dep_job_types.h new file mode 100644 index 000000000000..c6a484f24c8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_job_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_DEP_JOB_TYPES_H_ +#define _XE_DEP_JOB_TYPES_H_ + +#include <drm/gpu_scheduler.h> + +struct xe_dep_job; + +/** struct xe_dep_job_ops - Generic Xe dependency job operations */ +struct xe_dep_job_ops { + /** @run_job: Run generic Xe dependency job */ + struct dma_fence *(*run_job)(struct xe_dep_job *job); + /** @free_job: Free generic Xe dependency job */ + void (*free_job)(struct xe_dep_job *job); +}; + +/** struct xe_dep_job - Generic dependency Xe job */ +struct xe_dep_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @ops: dependency job operations */ + const struct xe_dep_job_ops *ops; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.c b/drivers/gpu/drm/xe/xe_dep_scheduler.c new file mode 100644 index 000000000000..9bd3bfd2e526 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/gpu_scheduler.h> + +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_device_types.h" + +/** + * DOC: Xe Dependency Scheduler + * + * The Xe dependency scheduler is a simple wrapper built around the DRM + * scheduler to execute jobs once their dependencies are resolved (i.e., all + * input fences specified as dependencies are signaled). The jobs that are + * executed contain virtual functions to run (execute) and free the job, + * allowing a single dependency scheduler to handle jobs performing different + * operations. + * + * Example use cases include deferred resource freeing, TLB invalidations after + * bind jobs, etc. + */ + +/** struct xe_dep_scheduler - Generic Xe dependency scheduler */ +struct xe_dep_scheduler { + /** @sched: DRM GPU scheduler */ + struct drm_gpu_scheduler sched; + /** @entity: DRM scheduler entity */ + struct drm_sched_entity entity; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; +}; + +static struct dma_fence *xe_dep_scheduler_run_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + return dep_job->ops->run_job(dep_job); +} + +static void xe_dep_scheduler_free_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + dep_job->ops->free_job(dep_job); +} + +static const struct drm_sched_backend_ops sched_ops = { + .run_job = xe_dep_scheduler_run_job, + .free_job = xe_dep_scheduler_free_job, +}; + +/** + * xe_dep_scheduler_create() - Generic Xe dependency scheduler create + * @xe: Xe device + * @submit_wq: Submit workqueue struct (can be NULL) + * @name: Name of dependency scheduler + * @job_limit: Max dependency jobs that can be scheduled + * + * Create a generic Xe dependency scheduler and initialize internal DRM + * scheduler objects. + * + * Return: Generic Xe dependency scheduler object on success, ERR_PTR failure + */ +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit) +{ + struct xe_dep_scheduler *dep_scheduler; + struct drm_gpu_scheduler *sched; + const struct drm_sched_init_args args = { + .ops = &sched_ops, + .submit_wq = submit_wq, + .num_rqs = 1, + .credit_limit = job_limit, + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = name, + .dev = xe->drm.dev, + }; + int err; + + dep_scheduler = kzalloc(sizeof(*dep_scheduler), GFP_KERNEL); + if (!dep_scheduler) + return ERR_PTR(-ENOMEM); + + err = drm_sched_init(&dep_scheduler->sched, &args); + if (err) + goto err_free; + + sched = &dep_scheduler->sched; + err = drm_sched_entity_init(&dep_scheduler->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + init_rcu_head(&dep_scheduler->rcu); + + return dep_scheduler; + +err_sched: + drm_sched_fini(&dep_scheduler->sched); +err_free: + kfree(dep_scheduler); + + return ERR_PTR(err); +} + +/** + * xe_dep_scheduler_fini() - Generic Xe dependency scheduler finalize + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Finalize internal DRM scheduler objects and free generic Xe dependency + * scheduler object + */ +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler) +{ + drm_sched_entity_fini(&dep_scheduler->entity); + drm_sched_fini(&dep_scheduler->sched); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(dep_scheduler, rcu); +} + +/** + * xe_dep_scheduler_entity() - Retrieve a generic Xe dependency scheduler + * DRM scheduler entity + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Return: The generic Xe dependency scheduler's DRM scheduler entity + */ +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler) +{ + return &dep_scheduler->entity; +} diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.h b/drivers/gpu/drm/xe/xe_dep_scheduler.h new file mode 100644 index 000000000000..853961eec64b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/types.h> + +struct drm_sched_entity; +struct workqueue_struct; +struct xe_dep_scheduler; +struct xe_device; + +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit); + +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler); + +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6ece4defa9df..9e4773a17ef8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -54,6 +54,7 @@ #include "xe_pcode.h" #include "xe_pm.h" #include "xe_pmu.h" +#include "xe_psmi.h" #include "xe_pxp.h" #include "xe_query.h" #include "xe_shrinker.h" @@ -63,7 +64,9 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" +#include "xe_vm_madvise.h" #include "xe_vram.h" +#include "xe_vram_types.h" #include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" @@ -200,6 +203,9 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -688,6 +694,21 @@ static void sriov_update_device_info(struct xe_device *xe) } } +static int xe_device_vram_alloc(struct xe_device *xe) +{ + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return -ENOMEM; + + xe->mem.vram = vram; + return 0; +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance @@ -722,7 +743,7 @@ int xe_device_probe_early(struct xe_device *xe) * possible, but still return the previous error for error * propagation */ - err = xe_survivability_mode_enable(xe); + err = xe_survivability_mode_boot_enable(xe); if (err) return err; @@ -735,6 +756,10 @@ int xe_device_probe_early(struct xe_device *xe) xe->wedged.mode = xe_modparam.wedged_mode; + err = xe_device_vram_alloc(xe); + if (err) + return err; + return 0; } ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ @@ -863,7 +888,7 @@ int xe_device_probe(struct xe_device *xe) } if (xe->tiles->media_gt && - XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_GT_WA(xe->tiles->media_gt, 15015404425_disable)) XE_DEVICE_WA_DISABLE(xe, 15015404425); err = xe_devcoredump_init(xe); @@ -888,6 +913,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_psmi_init(xe); + if (err) + return err; + err = drm_dev_register(&xe->drm, 0); if (err) return err; @@ -921,6 +950,10 @@ int xe_device_probe(struct xe_device *xe) xe_vsec_init(xe); + err = xe_sriov_late_init(xe); + if (err) + goto err_unregister_display; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -1019,7 +1052,7 @@ void xe_device_l2_flush(struct xe_device *xe) gt = xe_root_mmio_gt(xe); - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); @@ -1063,7 +1096,7 @@ void xe_device_td_flush(struct xe_device *xe) return; root_gt = xe_root_mmio_gt(xe); - if (XE_WA(root_gt, 16023588340)) { + if (XE_GT_WA(root_gt, 16023588340)) { /* A transient flush is not sufficient: flush the L2 */ xe_device_l2_flush(xe); } else { @@ -1134,11 +1167,63 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) } /** + * DOC: Xe Device Wedging + * + * Xe driver uses drm device wedged uevent as documented in Documentation/gpu/drm-uapi.rst. + * When device is in wedged state, every IOCTL will be blocked and GT cannot be + * used. Certain critical errors like gt reset failure, firmware failures can cause + * the device to be wedged. The default recovery method for a wedged state + * is rebind/bus-reset. + * + * Another recovery method is vendor-specific. Below are the cases that send + * ``WEDGED=vendor-specific`` recovery method in drm device wedged uevent. + * + * Case: Firmware Flash + * -------------------- + * + * Identification Hint + * +++++++++++++++++++ + * + * ``WEDGED=vendor-specific`` drm device wedged uevent with + * :ref:`Runtime Survivability mode <xe-survivability-mode>` is used to notify + * admin/userspace consumer about the need for a firmware flash. + * + * Recovery Procedure + * ++++++++++++++++++ + * + * Once ``WEDGED=vendor-specific`` drm device wedged uevent is received, follow + * the below steps + * + * - Check Runtime Survivability mode sysfs. + * If enabled, firmware flash is required to recover the device. + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * - Admin/userpsace consumer can use firmware flashing tools like fwupd to flash + * firmware and restore device to normal operation. + */ + +/** + * xe_device_set_wedged_method - Set wedged recovery method + * @xe: xe device instance + * @method: recovery method to set + * + * Set wedged recovery method to be sent in drm wedged uevent. + */ +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method) +{ + xe->wedged.method = method; +} + +/** * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a module - * re-probe (unbind + bind). + * This is a final state that can only be cleared with the recovery method + * specified in the drm wedged uevent. The method can be set using + * xe_device_set_wedged_method before declaring the device as wedged. If no method + * is set, reprobe (unbind/re-bind) will be sent by default. + * * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset * failure or guc loading failure. Userspace will be notified of this state @@ -1172,13 +1257,18 @@ void xe_device_declare_wedged(struct xe_device *xe) "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); - - /* Notify userspace of wedged device */ - drm_dev_wedged_event(&xe->drm, - DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET, - NULL); } for_each_gt(gt, xe, id) xe_gt_declare_wedged(gt); + + if (xe_device_wedged(xe)) { + /* If no wedge recovery method is set, use default */ + if (!xe->wedged.method) + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_REBIND | + DRM_WEDGE_RECOVERY_BUS_RESET); + + /* Notify userspace of wedged device */ + drm_dev_wedged_event(&xe->drm, xe->wedged.method, NULL); + } } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bc802e066a7d..32cc6323b7f6 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -187,6 +187,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) return atomic_read(&xe->wedged.flag); } +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method); void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index bd9015761aa0..6ee422594b56 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -76,7 +76,7 @@ lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, c { struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); struct xe_tile *root = xe_device_get_root_tile(xe); - u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u32 cap = 0, ver_low = FAN_TABLE, ver_high = FAN_TABLE; u16 major = 0, minor = 0, hotfix = 0, build = 0; int ret; @@ -115,7 +115,7 @@ lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *a { struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); struct xe_tile *root = xe_device_get_root_tile(xe); - u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u32 cap = 0, ver_low = VR_CONFIG, ver_high = VR_CONFIG; u16 major = 0, minor = 0, hotfix = 0, build = 0; int ret; @@ -153,7 +153,7 @@ static int late_bind_create_files(struct device *dev) { struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); struct xe_tile *root = xe_device_get_root_tile(xe); - u32 cap; + u32 cap = 0; int ret; xe_pm_runtime_get(xe); @@ -186,7 +186,7 @@ static void late_bind_remove_files(struct device *dev) { struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); struct xe_tile *root = xe_device_get_root_tile(xe); - u32 cap; + u32 cap = 0; int ret; xe_pm_runtime_get(xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d4d2c6854790..1e780f8a2a8c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -10,7 +10,6 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_device.h> #include "xe_devcoredump_types.h" @@ -24,9 +23,9 @@ #include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" #include "xe_sriov_vf_types.h" +#include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_ttm_vram_mgr_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -39,6 +38,7 @@ struct xe_ggtt; struct xe_i2c; struct xe_pat_ops; struct xe_pxp; +struct xe_vram_region; #define XE_BO_INVALID_OFFSET LONG_MAX @@ -72,61 +72,6 @@ struct xe_pxp; struct xe_tile * : (tile__)->xe) /** - * struct xe_vram_region - memory region structure - * This is used to describe a memory region in xe - * device, such as HBM memory or CXL extension memory. - */ -struct xe_vram_region { - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @usable_size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. - */ - resource_size_t io_size; - /** @dpa_base: This memory regions's DPA (device physical address) base */ - resource_size_t dpa_base; - /** - * @usable_size: usable size of VRAM - * - * Usable size of VRAM excluding reserved portions - * (e.g stolen mem) - */ - resource_size_t usable_size; - /** - * @actual_physical_size: Actual VRAM size - * - * Actual VRAM size including reserved portions - * (e.g stolen mem) - */ - resource_size_t actual_physical_size; - /** @mapping: pointer to VRAM mappable space */ - void __iomem *mapping; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) - /** @pagemap: Used to remap device memory as ZONE_DEVICE */ - struct dev_pagemap pagemap; - /** - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory - * pages of this tile. - */ - struct drm_pagemap dpagemap; - /** - * @hpa_base: base host physical address - * - * This is generated when remap device memory as ZONE_DEVICE - */ - resource_size_t hpa_base; -#endif -}; - -/** * struct xe_mmio - register mmio structure * * Represents an MMIO region that the CPU may use to access registers. A @@ -216,7 +161,7 @@ struct xe_tile { * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; @@ -238,12 +183,18 @@ struct xe_tile { struct { /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ struct xe_ggtt_node *ggtt_balloon[2]; + + /** @sriov.vf.ccs: CCS read and write contexts for VF. */ + struct xe_tile_vf_ccs ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; } vf; } sriov; /** @memirq: Memory Based Interrupts. */ struct xe_memirq memirq; + /** @csc_hw_error_work: worker to report CSC HW errors */ + struct work_struct csc_hw_error_work; + /** @pcode: tile's PCODE */ struct { /** @pcode.lock: protecting tile's PCODE mailbox data */ @@ -255,6 +206,9 @@ struct xe_tile { /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ struct kobject *sysfs; + + /** @debugfs: debugfs directory associated with this tile */ + struct dentry *debugfs; }; /** @@ -336,8 +290,8 @@ struct xe_device { u8 has_mbx_power_limits:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; - /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ - u8 has_range_tlb_invalidation:1; + /** @info.has_range_tlb_inval: Has range based TLB invalidations */ + u8 has_range_tlb_inval:1; /** @info.has_sriov: Supports SR-IOV */ u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ @@ -412,7 +366,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @mem.vram: VRAM info for device */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; /** @mem.sys_mgr: system memory shrinker. */ @@ -476,7 +430,7 @@ struct xe_device { /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; - /** @unordered_wq: used to serialize unordered work, mostly display */ + /** @unordered_wq: used to serialize unordered work */ struct workqueue_struct *unordered_wq; /** @destroy_wq: used to serialize user destroy work, like queue */ @@ -553,6 +507,12 @@ struct xe_device { /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ struct notifier_block pm_notifier; + /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */ + struct completion pm_block; + /** @rebind_resume_list: List of wq items to kick on resume. */ + struct list_head rebind_resume_list; + /** @rebind_resume_lock: Lock to protect the rebind_resume_list */ + struct mutex rebind_resume_lock; /** @pmt: Support the PMT driver callback interface */ struct { @@ -590,6 +550,8 @@ struct xe_device { atomic_t flag; /** @wedged.mode: Mode controlled by kernel parameter and debugfs */ int mode; + /** @wedged.method: Recovery method to be sent in the drm device wedged uevent */ + unsigned long method; } wedged; /** @bo_device: Struct to control async free of BOs */ @@ -625,6 +587,14 @@ struct xe_device { atomic64_t global_total_pages; #endif + /** @psmi: GPU debugging via additional validation HW */ + struct { + /** @psmi.capture_obj: PSMI buffer for VRAM */ + struct xe_bo *capture_obj[XE_MAX_TILES_PER_DEVICE + 1]; + /** @psmi.region_mask: Mask of valid memory regions */ + u8 region_mask; + } psmi; + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -658,7 +628,6 @@ struct xe_device { struct { unsigned int hpll_freq; unsigned int czclk_freq; - unsigned int fsb_freq, mem_freq, is_ddr3; }; #endif }; diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 346f857f3837..95d06bd65b0f 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -72,7 +72,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return ret; } - ret = xe_bo_pin_external(bo); + ret = xe_bo_pin_external(bo, true); xe_assert(xe, !ret); return 0; @@ -191,10 +191,22 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) { struct xe_bo *bo = gem_to_xe_bo(obj); struct dma_buf *buf; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (bo->vm) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&bo->ttm, &ctx); + if (ret) + return ERR_PTR(ret); + buf = drm_gem_prime_export(obj, flags); if (!IS_ERR(buf)) buf->ops = &xe_dmabuf_ops; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index af7916315ac6..fdd514fec5ef 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -649,7 +649,7 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) return -ETIMEDOUT; } - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); @@ -805,7 +805,7 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) cancel_delayed_work_sync(&stream->buf_poll_work); - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 44364c042ad7..374c831e691b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -237,6 +237,15 @@ retry: goto err_unlock_list; } + /* + * It's OK to block interruptible here with the vm lock held, since + * on task freezing during suspend / hibernate, the call will + * return -ERESTARTSYS and the IOCTL will be rerun. + */ + err = wait_for_completion_interruptible(&xe->pm_block); + if (err) + goto err_unlock_list; + vm_exec.vm = &vm->gpuvm; vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; if (xe_vm_in_lr_mode(vm)) { diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8991b4aed440..063c89d981e5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -12,6 +12,7 @@ #include <drm/drm_file.h> #include <uapi/drm/xe_drm.h> +#include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" @@ -39,6 +40,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue static void __xe_exec_queue_free(struct xe_exec_queue *q) { + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) + if (q->tlb_inval[i].dep_scheduler) + xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); if (q->vm) @@ -50,6 +57,39 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) kfree(q); } +static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { + struct xe_dep_scheduler *dep_scheduler; + struct xe_gt *gt; + struct workqueue_struct *wq; + + if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) + gt = tile->primary_gt; + else + gt = tile->media_gt; + + if (!gt) + continue; + + wq = gt->tlb_inval.job_wq; + +#define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ + dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, + MAX_TLB_INVAL_JOBS); + if (IS_ERR(dep_scheduler)) + return PTR_ERR(dep_scheduler); + + q->tlb_inval[i].dep_scheduler = dep_scheduler; + } +#undef MAX_TLB_INVAL_JOBS + + return 0; +} + static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -94,6 +134,14 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { + err = alloc_dep_schedulers(xe, q); + if (err) { + __xe_exec_queue_free(q); + return ERR_PTR(err); + } + } + if (vm) q->vm = xe_vm_get(vm); @@ -742,6 +790,21 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, } /** + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec_queue. + * + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) +{ + return q->lrc[0]; +} + +/** * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue * @@ -1028,3 +1091,51 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) return err; } + +/** + * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references + * within all LRCs of a queue. + * @q: the &xe_exec_queue struct instance containing target LRCs + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure + */ +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) +{ + int i; + int err = 0; + + for (i = 0; i < q->width; ++i) { + xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch); + xe_lrc_update_hwctx_regs_with_address(q->lrc[i]); + err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch); + if (err) + break; + } + + return err; +} + +/** + * xe_exec_queue_jobs_ring_restore - Re-emit ring commands of requests pending on given queue. + * @q: the &xe_exec_queue struct instance + */ +void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + + /* + * This routine is used within VF migration recovery. This means + * using the lock here introduces a restriction: we cannot wait + * for any GFX HW response while the lock is taken. + */ + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + if (xe_sched_job_is_error(job)) + continue; + + q->ring_ops->emit_job(job); + } + spin_unlock(&sched->base.job_list_lock); +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 17bc50a7f05a..15ec852e7f7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -90,4 +90,9 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm); void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); + +void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q); + +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index cc1cffb5c87f..ba443a497b38 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -87,6 +87,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) /* flag to indicate low latency hint to guc */ #define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5) +/* for migration (kernel copy, clear, bind) jobs */ +#define EXEC_QUEUE_FLAG_MIGRATE BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -132,6 +134,19 @@ struct xe_exec_queue { struct list_head link; } lr; +#define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT 0 +#define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT 1 +#define XE_EXEC_QUEUE_TLB_INVAL_COUNT (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1) + + /** @tlb_inval: TLB invalidations exec queue state */ + struct { + /** + * @tlb_inval.dep_scheduler: The TLB invalidation + * dependency scheduler + */ + struct xe_dep_scheduler *dep_scheduler; + } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; + /** @pxp: PXP info tracking */ struct { /** @pxp.type: PXP session type used by this queue */ diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 6581cb0f0e59..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -123,11 +123,19 @@ static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) return 0; } +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + static int fn_to_prefix(const char *fn, char *prefix, size_t size) { size_t len; - fn = basename(fn); + fn = xbasename(fn); len = strlen(fn); if (len > size - 1) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 29d4d3f51da1..71c7690a92b3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -23,13 +23,13 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_printk.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_res_cursor.h" #include "xe_sriov.h" #include "xe_tile_sriov_vf.h" +#include "xe_tlb_inval.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -106,10 +106,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) static void ggtt_update_access_counter(struct xe_ggtt *ggtt) { struct xe_tile *tile = ggtt->tile; - struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ? + struct xe_gt *affected_gt = XE_GT_WA(tile->primary_gt, 22019338487) ? tile->primary_gt : tile->media_gt; struct xe_mmio *mmio = &affected_gt->mmio; - u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; + u32 max_gtt_writes = XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; /* * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit * to wait for completion of prior GTT writes before letting this through. @@ -284,8 +284,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (GRAPHICS_VERx100(xe) >= 1270) ggtt->pt_ops = (ggtt->tile->media_gt && - XE_WA(ggtt->tile->media_gt, 22019338487)) || - XE_WA(ggtt->tile->primary_gt, 22019338487) ? + XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || + XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; @@ -438,9 +438,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) if (!gt) return; - err = xe_gt_tlb_invalidation_ggtt(gt); - if (err) - drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); + err = xe_tlb_inval_ggtt(>->tlb_inval); + xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); } static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 869b43a4151d..455ccaf17314 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -101,6 +101,19 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +/** + * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler. + * @sched: the &xe_gpu_scheduler struct instance + * + * This call disables further runs of scheduling work queue. It does not wait + * for any in-progress runs to finish, only makes sure no further runs happen + * afterwards. + */ +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); +} + void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) { drm_sched_resume_timeout(&sched->base, sched->base.timeout); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 308061f0cf37..e548b2aed95a 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -21,6 +21,7 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched); void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 1d84bf2f2cef..f5ae28af60d4 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -266,7 +266,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) unsigned int fw_ref; int ret; - if (XE_WA(tile->primary_gt, 14018094691)) { + if (XE_GT_WA(tile->primary_gt, 14018094691)) { fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -281,7 +281,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(tile->primary_gt, 14018094691)) + if (XE_GT_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); if (ret) @@ -593,7 +593,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; /* WA only applies if the GSC is loaded */ - if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) + if (!XE_GT_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) return; xe_mmio_rmw32(>->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index c8eda36546d3..34505a6d93ed 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -37,10 +37,10 @@ #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_pc.h" +#include "xe_guc_submit.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -57,6 +57,7 @@ #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_tlb_inval.h" #include "xe_tuning.h" #include "xe_uc.h" #include "xe_uc_fw.h" @@ -105,7 +106,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); @@ -127,7 +128,7 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; if (xe_gt_is_media_type(gt)) @@ -399,7 +400,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); - err = xe_wa_init(gt); + err = xe_wa_gt_init(gt); if (err) return err; @@ -407,12 +408,12 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_oob(gt); + xe_wa_process_gt_oob(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); - err = xe_gt_tlb_invalidation_init_early(gt); + err = xe_gt_tlb_inval_init_early(gt); if (err) return err; @@ -564,11 +565,9 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); + err = xe_migrate_init(tile->migrate); + if (err) goto err_force_wake; - } } err = xe_uc_load_hw(>->uc); @@ -804,6 +803,11 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } +static int gt_wait_reset_unblock(struct xe_gt *gt) +{ + return xe_guc_wait_reset_unblock(>->uc.guc); +} + static int gt_reset(struct xe_gt *gt) { unsigned int fw_ref; @@ -818,6 +822,10 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + err = gt_wait_reset_unblock(gt); + if (!err) + xe_gt_warn(gt, "reset block failed to get lifted"); + xe_pm_runtime_get(gt_to_xe(gt)); if (xe_fault_inject_gt_reset()) { @@ -842,7 +850,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_stop(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); err = do_gt_reset(gt); if (err) @@ -958,7 +966,7 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || xe_uc_fw_is_loaded(>->uc.gsc.fw) || xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && - XE_WA(gt, 22019338487)) + XE_GT_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; @@ -1056,5 +1064,5 @@ void xe_gt_declare_wedged(struct xe_gt *gt) xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); xe_uc_declare_wedged(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 848618acdca8..bf3a67b5951c 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -29,6 +29,7 @@ #include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" +#include "xe_sa.h" #include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc_debugfs.h" @@ -128,7 +129,34 @@ static int sa_info(struct xe_gt *gt, struct drm_printer *p) xe_pm_runtime_get(gt_to_xe(gt)); drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, - tile->mem.kernel_bb_pool->gpu_addr); + xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + +static int sa_info_vf_ccs(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_sa_manager *bb_pool; + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(gt_to_xe(gt))) + return 0; + + xe_pm_runtime_get(gt_to_xe(gt)); + + for_each_ccs_rw_ctx(ctx_id) { + bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; + if (!bb_pool) + break; + + drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); + drm_printf(p, "-------------------------\n"); + drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + drm_puts(p, "\n"); + } + xe_pm_runtime_put(gt_to_xe(gt)); return 0; @@ -303,6 +331,13 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; +/* + * only for GT debugfs files which are valid on VF. Not valid on PF. + */ +static const struct drm_info_list vf_only_debugfs_list[] = { + {"sa_info_vf_ccs", .show = xe_gt_debugfs_simple_show, .data = sa_info_vf_ccs}, +}; + /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, @@ -388,13 +423,18 @@ void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *parent = gt->tile->debugfs; struct dentry *root; + char symlink[16]; char name[8]; xe_gt_assert(gt, minor->debugfs_root); + if (IS_ERR(parent)) + return; + snprintf(name, sizeof(name), "gt%d", gt->info.id); - root = debugfs_create_dir(name, minor->debugfs_root); + root = debugfs_create_dir(name, parent); if (IS_ERR(root)) { drm_warn(&xe->drm, "Create GT directory failed"); return; @@ -419,6 +459,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) drm_debugfs_create_files(pf_only_debugfs_list, ARRAY_SIZE(pf_only_debugfs_list), root, minor); + else + drm_debugfs_create_files(vf_only_debugfs_list, + ARRAY_SIZE(vf_only_debugfs_list), + root, minor); + xe_uc_debugfs_register(>->uc, root); @@ -426,4 +471,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) xe_gt_sriov_pf_debugfs_register(gt, root); else if (IS_SRIOV_VF(xe)) xe_gt_sriov_vf_debugfs_register(gt, root); + + /* + * Backwards compatibility only: create a link for the legacy clients + * who may expect gt/ directory at the root level, not the tile level. + */ + snprintf(symlink, sizeof(symlink), "tile%u/%s", gt->tile->id, name); + debugfs_create_symlink(name, minor->debugfs_root, symlink); } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index ffb210216aa9..f8950a52d0a4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -322,15 +322,11 @@ static void gt_idle_fini(void *arg) { struct kobject *kobj = arg; struct xe_gt *gt = kobj_to_gt(kobj->parent); - unsigned int fw_ref; xe_gt_idle_disable_pg(gt); - if (gt_to_xe(gt)->info.skip_guc_pc) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (gt_to_xe(gt)->info.skip_guc_pc) xe_gt_idle_disable_c6(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } sysfs_remove_files(kobj, gt_idle_attrs); kobject_put(kobj); @@ -390,14 +386,23 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); } -void xe_gt_idle_disable_c6(struct xe_gt *gt) +int xe_gt_idle_disable_c6(struct xe_gt *gt) { + unsigned int fw_ref; + xe_device_assert_mem_access(gt_to_xe(gt)); - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); if (IS_SRIOV_VF(gt_to_xe(gt))) - return; + return 0; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; xe_mmio_write32(>->mmio, RC_CONTROL, 0); xe_mmio_write32(>->mmio, RC_STATE, 0); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 591a01e181bc..9c34a155e102 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -13,7 +13,7 @@ struct xe_gt; int xe_gt_idle_init(struct xe_gt_idle *gtidle); void xe_gt_idle_enable_c6(struct xe_gt *gt); -void xe_gt_idle_disable_c6(struct xe_gt *gt); +int xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 64a2f0d6aaf9..683ac021a06d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -46,8 +46,6 @@ * MCR registers are not available on Virtual Function (VF). */ -#define STEER_SEMAPHORE XE_REG(0xFD0) - static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) { return reg_mcr.__reg; @@ -533,7 +531,7 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); - xe_mmio_write32(>->mmio, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(>->mmio, STEER_SEMAPHORE, steer_val); xe_mmio_write32(>->mmio, SF_MCR_SELECTOR, steer_val); /* * For GAM registers, all reads should be directed to instance 1 diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 5a75d56d8558..d02d22fb3659 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -16,13 +16,13 @@ #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" #include "xe_svm.h" #include "xe_trace_bo.h" #include "xe_vm.h" +#include "xe_vram_types.h" struct pagefault { u64 page_addr; @@ -74,7 +74,7 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) } static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool atomic, unsigned int id) + bool need_vram_move, struct xe_vram_region *vram) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -84,24 +84,13 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, if (err) return err; - if (atomic && IS_DGFX(vm->xe)) { - if (xe_vma_is_userptr(vma)) { - err = -EACCES; - return err; - } + if (!bo) + return 0; - /* Migrate to VRAM, move should invalidate the VMA first */ - err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); - if (err) - return err; - } else if (bo) { - /* Create backing store if needed */ - err = xe_bo_validate(bo, vm, true); - if (err) - return err; - } + err = need_vram_move ? xe_bo_migrate(bo, vram->placement) : + xe_bo_validate(bo, vm, true); - return 0; + return err; } static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, @@ -112,10 +101,14 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, struct drm_exec exec; struct dma_fence *fence; ktime_t end = 0; - int err; + int err, needs_vram; lockdep_assert_held_write(&vm->lock); + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) + return needs_vram < 0 ? needs_vram : -EACCES; + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); @@ -138,7 +131,7 @@ retry_userptr: /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, atomic, tile->id); + err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); drm_exec_retry_on_contention(&exec); if (xe_vm_validate_should_retry(&exec, err, &end)) err = -EAGAIN; @@ -573,7 +566,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, true, tile->id); + ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); drm_exec_retry_on_contention(&exec); if (ret) break; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index bdbd15f3afe3..c4dda87b47cc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -55,7 +55,12 @@ static void pf_init_workers(struct xe_gt *gt) static void pf_fini_workers(struct xe_gt *gt) { xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - disable_work_sync(>->sriov.pf.workers.restart); + + if (disable_work_sync(>->sriov.pf.workers.restart)) { + xe_gt_sriov_dbg_verbose(gt, "pending restart disabled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } } /** @@ -207,8 +212,11 @@ static void pf_cancel_restart(struct xe_gt *gt) { xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - if (cancel_work_sync(>->sriov.pf.workers.restart)) + if (cancel_work_sync(>->sriov.pf.workers.restart)) { xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } } /** @@ -226,9 +234,12 @@ static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - xe_pm_runtime_get(xe); + xe_gt_assert(gt, !xe_pm_runtime_suspended(xe)); + xe_gt_sriov_pf_config_restart(gt); xe_gt_sriov_pf_control_restart(gt); + + /* release an rpm reference taken on our behalf */ xe_pm_runtime_put(xe); xe_gt_sriov_dbg(gt, "restart completed\n"); @@ -247,8 +258,13 @@ static void pf_queue_restart(struct xe_gt *gt) xe_gt_assert(gt, IS_SRIOV_PF(xe)); - if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + /* take an rpm reference on behalf of the worker */ + xe_pm_runtime_get_noresume(xe); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) { xe_gt_sriov_dbg(gt, "restart already in queue!\n"); + xe_pm_runtime_put(xe); + } } /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 494909f74eb2..c8f0320d032f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -33,6 +33,7 @@ #include "xe_migrate.h" #include "xe_sriov.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wopcm.h" #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) @@ -1433,7 +1434,8 @@ fail: return err; } -static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) +/* Return: %true if there was an LMEM provisioned, %false otherwise */ +static bool pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); xe_gt_assert(gt, xe_gt_is_main_type(gt)); @@ -1442,7 +1444,9 @@ static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_confi if (config->lmem_obj) { xe_bo_unpin_map_no_vm(config->lmem_obj); config->lmem_obj = NULL; + return true; } + return false; } static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1604,7 +1608,7 @@ static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile; - return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); + return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager); } static u64 pf_query_max_lmem(struct xe_gt *gt) @@ -2020,12 +2024,13 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); + bool released; if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { - pf_release_vf_config_lmem(gt, config); - if (xe_device_has_lmtt(xe)) + released = pf_release_vf_config_lmem(gt, config); + if (released && xe_device_has_lmtt(xe)) pf_update_vf_lmtt(xe, vfid); } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index b282838d59e6..0461d5513487 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -25,6 +25,7 @@ #include "xe_guc.h" #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" +#include "xe_lrc.h" #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" @@ -751,6 +752,19 @@ failed: } /** + * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs. + * @gt: the &xe_gt struct instance + */ +void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + xe_default_lrc_update_memirq_regs_with_address(hwe); +} + +/** * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, * or just mark that a GuC is ready for it. * @gt: the &xe_gt struct instance linked to target GuC diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e0357f341a2d..0af1dc769fe0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -21,6 +21,7 @@ void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); +void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt); int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c deleted file mode 100644 index 086c12ee3d9d..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ /dev/null @@ -1,596 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "xe_gt_tlb_invalidation.h" - -#include "abi/guc_actions_abi.h" -#include "xe_device.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_gt_stats.h" -#include "xe_mmio.h" -#include "xe_pm.h" -#include "xe_sriov.h" -#include "xe_trace.h" -#include "regs/xe_guc_regs.h" - -#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS - -/* - * TLB inval depends on pending commands in the CT queue and then the real - * invalidation time. Double up the time to process full CT queue - * just to be on the safe side. - */ -static long tlb_timeout_jiffies(struct xe_gt *gt) -{ - /* this reflects what HW/GuC needs to process TLB inv request */ - const long hw_tlb_timeout = HZ / 4; - - /* this estimates actual delay caused by the CTB transport */ - long delay = xe_guc_ct_queue_proc_time_jiffies(>->uc.guc.ct); - - return hw_tlb_timeout + 2 * delay; -} - -static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - xe_pm_runtime_put(gt_to_xe(fence->gt)); - fence->gt = NULL; /* fini() should be called once */ -} - -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); - - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - xe_gt_tlb_invalidation_fence_fini(fence); - dma_fence_signal(&fence->base); - if (!stack) - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - __invalidation_fence_signal(gt_to_xe(fence->gt), fence); -} - -static void xe_gt_tlb_fence_timeout(struct work_struct *work) -{ - struct xe_gt *gt = container_of(work, struct xe_gt, - tlb_invalidation.fence_tdr.work); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - - LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); - - spin_lock_irq(>->tlb_invalidation.pending_lock); - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - s64 since_inval_ms = ktime_ms_delta(ktime_get(), - fence->invalidation_time); - - if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt)) - break; - - trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); - xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", - fence->seqno, gt->tlb_invalidation.seqno_recv); - - fence->base.error = -ETIME; - invalidation_fence_signal(xe, fence); - } - if (!list_empty(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - spin_unlock_irq(>->tlb_invalidation.pending_lock); -} - -/** - * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state - * @gt: GT structure - * - * Initialize GT TLB invalidation state, purely software initialization, should - * be called once during driver load. - * - * Return: 0 on success, negative error code on error. - */ -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) -{ - gt->tlb_invalidation.seqno = 1; - INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); - spin_lock_init(>->tlb_invalidation.pending_lock); - spin_lock_init(>->tlb_invalidation.lock); - INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, - xe_gt_tlb_fence_timeout); - - return 0; -} - -/** - * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: GT structure - * - * Signal any pending invalidation fences, should be called during a GT reset - */ -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) -{ - struct xe_gt_tlb_invalidation_fence *fence, *next; - int pending_seqno; - - /* - * we can get here before the CTs are even initialized if we're wedging - * very early, in which case there are not going to be any pending - * fences so we can bail immediately. - */ - if (!xe_guc_ct_initialized(>->uc.guc.ct)) - return; - - /* - * CT channel is already disabled at this point. No new TLB requests can - * appear. - */ - - mutex_lock(>->uc.guc.ct.lock); - spin_lock_irq(>->tlb_invalidation.pending_lock); - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - /* - * We might have various kworkers waiting for TLB flushes to complete - * which are not tracked with an explicit TLB fence, however at this - * stage that will never happen since the CT is already disabled, so - * make sure we signal them here under the assumption that we have - * completed a full GT reset. - */ - if (gt->tlb_invalidation.seqno == 1) - pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; - else - pending_seqno = gt->tlb_invalidation.seqno - 1; - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(gt_to_xe(gt), fence); - spin_unlock_irq(>->tlb_invalidation.pending_lock); - mutex_unlock(>->uc.guc.ct.lock); -} - -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); - - if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) - return false; - - if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return seqno_recv >= seqno; -} - -static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence, - u32 *action, int len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - int seqno; - int ret; - - xe_gt_assert(gt, fence); - - /* - * XXX: The seqno algorithm relies on TLB invalidation being processed - * in order which they currently are, if that changes the algorithm will - * need to be updated. - */ - - mutex_lock(&guc->ct.lock); - seqno = gt->tlb_invalidation.seqno; - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - action[1] = seqno; - ret = xe_guc_ct_send_locked(&guc->ct, action, len, - G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret) { - spin_lock_irq(>->tlb_invalidation.pending_lock); - /* - * We haven't actually published the TLB fence as per - * pending_fences, but in theory our seqno could have already - * been written as we acquired the pending_lock. In such a case - * we can just go ahead and signal the fence here. - */ - if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(xe, fence); - } else { - fence->invalidation_time = ktime_get(); - list_add_tail(&fence->link, - >->tlb_invalidation.pending_fences); - - if (list_is_singular(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - } - spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else { - __invalidation_fence_signal(xe, fence); - } - if (!ret) { - gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->tlb_invalidation.seqno) - gt->tlb_invalidation.seqno = 1; - } - mutex_unlock(&guc->ct.lock); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); - - return ret; -} - -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) - -/** - * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * - * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use the invalidation fence to wait for completion. - * - * Return: 0 on success, negative error code on error - */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, /* seqno, replaced in send_tlb_invalidation */ - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), - }; - int ret; - - ret = send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); - /* - * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches - * should be nuked on a GT reset so this error can be ignored. - */ - if (ret == -ECANCELED) - return 0; - - return ret; -} - -/** - * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: GT structure - * - * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is - * synchronous. - * - * Return: 0 on success, negative error code on error - */ -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; - - if (xe_guc_ct_enabled(>->uc.guc.ct) && - gt->uc.guc.submission_state.enabled) { - struct xe_gt_tlb_invalidation_fence fence; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret) - return ret; - - xe_gt_tlb_invalidation_fence_wait(&fence); - } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { - struct xe_mmio *mmio = >->mmio; - - if (IS_SRIOV_VF(xe)) - return 0; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else { - xe_mmio_write32(mmio, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } - - return 0; -} - -static int send_tlb_invalidation_all(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION_ALL, - 0, /* seqno, replaced in send_tlb_invalidation */ - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), - }; - - return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); -} - -/** - * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. - * @gt: the &xe_gt structure - * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion - * - * Send a request to invalidate all TLBs across PF and all VFs. - * - * Return: 0 on success, negative error code on error - */ -int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) -{ - int err; - - xe_gt_assert(gt, gt == fence->gt); - - err = send_tlb_invalidation_all(gt, fence); - if (err) - xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); - - return err; -} - -/* - * Ensure that roundup_pow_of_two(length) doesn't overflow. - * Note that roundup_pow_of_two() operates on unsigned long, - * not on u64. - */ -#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) - -/** - * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an - * address range - * - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * @start: start address - * @end: end address - * @asid: address space id - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid) -{ - struct xe_device *xe = gt_to_xe(gt); -#define MAX_TLB_INVALIDATION_LEN 7 - u32 action[MAX_TLB_INVALIDATION_LEN]; - u64 length = end - start; - int len = 0; - - xe_gt_assert(gt, fence); - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) { - __invalidation_fence_signal(xe, fence); - return 0; - } - - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation || - length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); - } else { - u64 orig_start = start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); - - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = asid; - action[len++] = lower_32_bits(start); - action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); - } - - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - - return send_tlb_invalidation(>->uc.guc, fence, action, len); -} - -/** - * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM - * @gt: graphics tile - * @vm: VM to invalidate - * - * Invalidate entire VM's address space - */ -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) -{ - struct xe_gt_tlb_invalidation_fence fence; - u64 range = 1ull << vm->xe->info.va_bits; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - - ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); - if (ret < 0) - return; - - xe_gt_tlb_invalidation_fence_wait(&fence); -} - -/** - * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler - * @guc: guc - * @msg: message indicating TLB invalidation done - * @len: length of message - * - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any - * invalidation fences for seqno. Algorithm for this depends on seqno being - * received in-order and asserts this assumption. - * - * Return: 0 on success, -EPROTO for malformed messages. - */ -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - unsigned long flags; - - if (unlikely(len != 1)) - return -EPROTO; - - /* - * This can also be run both directly from the IRQ handler and also in - * process_g2h_msg(). Only one may process any individual CT message, - * however the order they are processed here could result in skipping a - * seqno. To handle that we just process all the seqnos from the last - * seqno_recv up to and including the one in msg[0]. The delta should be - * very small so there shouldn't be much of pending_fences we actually - * need to iterate over here. - * - * From GuC POV we expect the seqnos to always appear in-order, so if we - * see something later in the timeline we can be sure that anything - * appearing earlier has already signalled, just that we have yet to - * officially process the CT message like if racing against - * process_g2h_msg(). - */ - spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); - if (tlb_invalidation_seqno_past(gt, msg[0])) { - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - return 0; - } - - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); - - if (!tlb_invalidation_seqno_past(gt, fence->seqno)) - break; - - invalidation_fence_signal(xe, fence); - } - - if (!list_empty(>->tlb_invalidation.pending_fences)) - mod_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - else - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - - return 0; -} - -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - -/** - * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence - * @gt: GT - * @fence: TLB invalidation fence to initialize - * @stack: fence is stack variable - * - * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * will be automatically called when fence is signalled (all fences must signal), - * even on error. - */ -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack) -{ - xe_pm_runtime_get_noresume(gt_to_xe(gt)); - - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&fence->base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - INIT_LIST_HEAD(&fence->link); - if (stack) - set_bit(FENCE_STACK_BIT, &fence->base.flags); - else - dma_fence_get(&fence->base); - fence->gt = gt; -} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h deleted file mode 100644 index f7f0f2eaf4b5..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_H_ -#define _XE_GT_TLB_INVALIDATION_H_ - -#include <linux/types.h> - -#include "xe_gt_tlb_invalidation_types.h" - -struct xe_gt; -struct xe_guc; -struct xe_vm; -struct xe_vma; - -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); - -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); -int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid); -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); - -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack); -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); - -static inline void -xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) -{ - dma_fence_wait(&fence->base, false); -} - -#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h deleted file mode 100644 index de6e825e0851..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_ -#define _XE_GT_TLB_INVALIDATION_TYPES_H_ - -#include <linux/dma-fence.h> - -struct xe_gt; - -/** - * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence - * - * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB - * invalidation completion. - */ -struct xe_gt_tlb_invalidation_fence { - /** @base: dma fence base */ - struct dma_fence base; - /** @gt: GT which fence belong to */ - struct xe_gt *gt; - /** @link: link into list of pending tlb fences */ - struct list_head link; - /** @seqno: seqno of TLB invalidation to signal fence one */ - int seqno; - /** @invalidation_time: time of TLB invalidation */ - ktime_t invalidation_time; -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 8c63e3263643..a0baa560dd71 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -138,7 +138,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) * but there's no tracking number assigned yet so we use a custom * OOB workaround descriptor. */ - if (XE_WA(gt, no_media_l3)) + if (XE_GT_WA(gt, no_media_l3)) return; if (GRAPHICS_VER(xe) >= 30) { diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 96344c604726..66158105aca5 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -17,6 +17,7 @@ #include "xe_oa_types.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" +#include "xe_tlb_inval_types.h" #include "xe_uc_types.h" struct xe_exec_queue_ops; @@ -185,34 +186,8 @@ struct xe_gt { struct work_struct worker; } reset; - /** @tlb_invalidation: TLB invalidation state */ - struct { - /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ -#define TLB_INVALIDATION_SEQNO_MAX 0x100000 - int seqno; - /** - * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, - * protected by CT lock - */ - int seqno_recv; - /** - * @tlb_invalidation.pending_fences: list of pending fences waiting TLB - * invaliations, protected by CT lock - */ - struct list_head pending_fences; - /** - * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences - * and updating @tlb_invalidation.seqno_recv. - */ - spinlock_t pending_lock; - /** - * @tlb_invalidation.fence_tdr: schedules a delayed call to - * xe_gt_tlb_fence_timeout after the timeut interval is over. - */ - struct delayed_work fence_tdr; - /** @tlb_invalidation.lock: protects TLB invalidation fences */ - spinlock_t lock; - } tlb_invalidation; + /** @tlb_inval: TLB invalidation state */ + struct xe_tlb_inval tlb_inval; /** * @ccs_mode: Number of compute engines enabled. @@ -411,7 +386,7 @@ struct xe_gt { unsigned long *oob; /** * @wa_active.oob_initialized: mark oob as initialized to help - * detecting misuse of XE_WA() - it can only be called on + * detecting misuse of XE_GT_WA() - it can only be called on * initialization after OOB WAs have being processed */ bool oob_initialized; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index b1d1d6da3758..37d06c51180c 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -16,6 +16,7 @@ #include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -81,11 +82,15 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) static u32 guc_ctl_feature_flags(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); u32 flags = GUC_CTL_ENABLE_LITE_RESTORE; - if (!guc_to_xe(guc)->info.skip_guc_pc) + if (!xe->info.skip_guc_pc) flags |= GUC_CTL_ENABLE_SLPC; + if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev))) + flags |= GUC_CTL_ENABLE_PSMI_LOGGING; + return flags; } @@ -157,7 +162,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt) * on RCS and CCSes with different address spaces, which on DG2 is * required as a WA for an HW bug. */ - if (XE_WA(gt, 22011391025)) + if (XE_GT_WA(gt, 22011391025)) return true; /* @@ -184,10 +189,10 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 flags = 0; - if (XE_WA(gt, 22012773006)) + if (XE_GT_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; - if (XE_WA(gt, 14014475959)) + if (XE_GT_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; if (needs_wa_dual_queue(gt)) @@ -201,19 +206,22 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; - if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) + if (XE_GT_WA(gt, 22012727170) || XE_GT_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - if (XE_WA(gt, 18020744125) && + if (XE_GT_WA(gt, 18020744125) && !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - if (XE_WA(gt, 1509372804)) + if (XE_GT_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_WA(gt, 14018913170)) + if (XE_GT_WA(gt, 14018913170)) flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + if (XE_GT_WA(gt, 16023683509)) + flags |= GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6; + return flags; } @@ -992,11 +1000,14 @@ static int guc_load_done(u32 status) case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH: case XE_GUC_LOAD_STATUS_DPC_ERROR: case XE_GUC_LOAD_STATUS_EXCEPTION: case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID: case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: return -1; } @@ -1136,17 +1147,29 @@ static void guc_wait_ucode(struct xe_guc *guc) } switch (ukernel) { + case XE_GUC_LOAD_STATUS_HWCONFIG_START: + xe_gt_err(gt, "still extracting hwconfig table.\n"); + break; + case XE_GUC_LOAD_STATUS_EXCEPTION: xe_gt_err(gt, "firmware exception. EIP: %#x\n", xe_mmio_read32(mmio, SOFT_SCRATCH(13))); break; + case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: + xe_gt_err(gt, "illegal init/ADS data\n"); + break; + case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: xe_gt_err(gt, "illegal register in save/restore workaround list\n"); break; - case XE_GUC_LOAD_STATUS_HWCONFIG_START: - xe_gt_err(gt, "still extracting hwconfig table.\n"); + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + xe_gt_err(gt, "illegal workaround KLV data\n"); + break; + + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: + xe_gt_err(gt, "illegal feature flag specified\n"); break; } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 131cfc56be00..5631722f34f5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -247,7 +247,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (XE_WA(gt, 1607983814)) + if (XE_GT_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -284,52 +284,26 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } -static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, - u32 value, - u32 *offset, u32 *remain) +static void guc_waklv_enable(struct xe_guc_ads *ads, + u32 data[], u32 data_len_dw, + u32 *offset, u32 *remain, + enum xe_guc_klv_ids klv_id) { - u32 size; - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 1), - value, - /* 1 dword data */ - }; - - size = sizeof(klv_entry); + size_t size = sizeof(u32) * (1 + data_len_dw); if (*remain < size) { drm_warn(&ads_to_xe(ads)->drm, - "w/a klv buffer too small to add klv id %d\n", klv_id); - } else { - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); - *offset += size; - *remain -= size; + "w/a klv buffer too small to add klv id 0x%04X\n", klv_id); + return; } -} -static void guc_waklv_enable_simple(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) -{ - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 0), - /* 0 dwords data */ - }; - u32 size; + /* 16:16 key/length */ + xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32, + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw)); + /* data_len_dw dwords of data */ + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), + *offset + sizeof(u32), data, data_len_dw * sizeof(u32)); - size = sizeof(klv_entry); - - if (xe_gt_WARN(ads_to_gt(ads), *remain < size, - "w/a klv buffer too small to add klv id %d\n", klv_id)) - return; - - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); *offset += size; *remain -= size; } @@ -343,44 +317,51 @@ static void guc_waklv_init(struct xe_guc_ads *ads) offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - &offset, &remain); - if (XE_WA(gt, 18024947630)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, - &offset, &remain); - if (XE_WA(gt, 16022287689)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, - &offset, &remain); - - if (XE_WA(gt, 14022866841)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, - &offset, &remain); + if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); + if (XE_GT_WA(gt, 18024947630)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING); + if (XE_GT_WA(gt, 16022287689)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE); + + if (XE_GT_WA(gt, 14022866841)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO); /* * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, * the default value for this register is determined to be 0xC40. This could change in the * future, so GuC depends on KMD to send it the correct value. */ - if (XE_WA(gt, 13011645652)) - guc_waklv_enable_one_word(ads, - GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, - 0xC40, - &offset, &remain); - - if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, - &offset, &remain); - - if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, - &offset, &remain); + if (XE_GT_WA(gt, 13011645652)) { + u32 data = 0xC40; + + guc_waklv_enable(ads, &data, sizeof(data) / sizeof(u32), &offset, &remain, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); + } + + if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET); + + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH); + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) { + u32 data[] = { + 0x0, + 0xF, + }; + guc_waklv_enable(ads, data, sizeof(data) / sizeof(u32), &offset, &remain, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); + } + + if (XE_GT_WA(gt, 14020001231)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT); size = guc_ads_waklv_size(ads) - remain; if (!size) @@ -784,7 +765,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 14a07dca48e7..502ca3a4ee60 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -164,7 +164,7 @@ u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *p if (offset < 0 || offset + size > cache->sam->base.size) return 0; - return cache->sam->gpu_addr + offset; + return xe_sa_manager_gpu_addr(cache->sam) + offset; } #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 3f4e6a46ff16..848065a25c44 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -26,11 +26,11 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_printk.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" +#include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" #include "xe_trace_guc.h" @@ -1416,8 +1416,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: ret = xe_guc_access_counter_notify_handler(guc, payload, @@ -1618,8 +1617,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: __g2h_release_space(ct, len); - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; default: xe_gt_warn(gt, "NOT_POSSIBLE"); diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 6f57578b07cb..0508f1064178 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -45,6 +45,11 @@ #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 +#define GUC_CONTEXT_NORMAL 0 +#define GUC_CONTEXT_COMPRESSION_SAVE 1 +#define GUC_CONTEXT_COMPRESSION_RESTORE 2 +#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1) + /* Helper for context registration H2G */ struct guc_ctxt_registration_info { u32 flags; @@ -103,10 +108,12 @@ struct guc_update_exec_queue_policy { #define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) +#define GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6 BIT(25) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) +#define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 68b192fe3b32..88557e86d637 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -722,7 +722,7 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (XE_GT_WA(pc_to_gt(pc), 22019338487)) { if (wait_for_flush_complete(pc) != 0) return -EAGAIN; } @@ -835,7 +835,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(gt, 22019338487)) { + if (XE_GT_WA(gt, 22019338487)) { if (xe_gt_is_media_type(gt)) return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); else @@ -899,7 +899,7 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); - if (XE_WA(tile->primary_gt, 14022085890)) + if (XE_GT_WA(tile->primary_gt, 14022085890)) ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); out: @@ -931,7 +931,7 @@ static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - return XE_WA(gt, 22019338487) && + return XE_GT_WA(gt, 22019338487) && pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; } @@ -1017,7 +1017,7 @@ static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { int ret; - if (!XE_WA(pc_to_gt(pc), 22019338487)) + if (!XE_GT_WA(pc_to_gt(pc), 22019338487)) return 0; guard(mutex)(&pc->freq_lock); @@ -1076,7 +1076,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret = 0; if (xe->info.skip_guc_pc) @@ -1086,17 +1085,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; - } - - xe_gt_idle_disable_c6(gt); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return 0; + return xe_gt_idle_disable_c6(gt); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index cafb47711e9b..1185b23b1384 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -542,7 +542,7 @@ static void __register_exec_queue(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_exec_queue(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); @@ -550,6 +550,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct guc_ctxt_registration_info info; xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -559,6 +560,9 @@ static void register_exec_queue(struct xe_exec_queue *q) info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + if (ctx_type != GUC_CONTEXT_NORMAL) + info.flags |= BIT(ctx_type); + if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); struct iosys_map map = xe_lrc_parallel_map(lrc); @@ -667,12 +671,18 @@ static void wq_item_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wqi_size)) return; + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN); wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | FIELD_PREP(WQ_LEN_MASK, len_dw); + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW); wqi[i++] = xe_lrc_descriptor(q->lrc[0]); + xe_gt_assert(guc_to_gt(guc), i == + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS); wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID); wqi[i++] = 0; + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL); for (j = 1; j < q->width; ++j) { struct xe_lrc *lrc = q->lrc[j]; @@ -693,6 +703,50 @@ static void wq_item_append(struct xe_exec_queue *q) parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); } +static int wq_items_rebase(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); + int i = q->guc->wqi_head; + + /* the ring starts after a header struct */ + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0])); + + while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { + u32 len_dw, type, val; + + if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE)) + break; + + val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) + + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN, + WQ_SIZE / sizeof(u32)); + len_dw = FIELD_GET(WQ_LEN_MASK, val); + type = FIELD_GET(WQ_TYPE_MASK, val); + + if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32))) + break; + + if (type == WQ_TYPE_MULTI_LRC) { + val = xe_lrc_descriptor(q->lrc[0]); + xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) + + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, + WQ_SIZE / sizeof(u32), val); + } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) { + break; + } + + i += (len_dw + 1) * sizeof(u32); + } + + if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { + xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n"); + return -EBADMSG; + } + return 0; +} + #define RESUME_PENDING ~0x0ull static void submit_exec_queue(struct xe_exec_queue *q) { @@ -761,7 +815,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_exec_queue(q); + register_exec_queue(q, GUC_CONTEXT_NORMAL); if (!lr) /* LR jobs are emitted in the exec IOCTL */ q->ring_ops->emit_job(job); submit_exec_queue(q); @@ -777,6 +831,30 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) return fence; } +/** + * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending + * on all queues under a guc. + * @guc: the &xe_guc struct instance + */ +void xe_guc_jobs_ring_rebase(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + /* + * This routine is used within VF migration recovery. This means + * using the lock here introduces a restriction: we cannot wait + * for any GFX HW response while the lock is taken. + */ + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_killed_or_banned_or_wedged(q)) + continue; + xe_exec_queue_jobs_ring_restore(q); + } + mutex_unlock(&guc->submission_state.lock); +} + static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); @@ -1773,6 +1851,43 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) } } +/** + * xe_guc_submit_reset_block - Disallow reset calls on given GuC. + * @guc: the &xe_guc struct instance + */ +int xe_guc_submit_reset_block(struct xe_guc *guc) +{ + return atomic_fetch_or(1, &guc->submission_state.reset_blocked); +} + +/** + * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC. + * @guc: the &xe_guc struct instance + */ +void xe_guc_submit_reset_unblock(struct xe_guc *guc) +{ + atomic_set_release(&guc->submission_state.reset_blocked, 0); + wake_up_all(&guc->ct.wq); +} + +static int guc_submit_reset_is_blocked(struct xe_guc *guc) +{ + return atomic_read_acquire(&guc->submission_state.reset_blocked); +} + +/* Maximum time of blocking reset */ +#define RESET_BLOCK_PERIOD_MAX (HZ * 5) + +/** + * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout. + * @guc: the &xe_guc struct instance + */ +int xe_guc_wait_reset_unblock(struct xe_guc *guc) +{ + return wait_event_timeout(guc->ct.wq, + !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX); +} + int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; @@ -1826,6 +1941,19 @@ void xe_guc_submit_stop(struct xe_guc *guc) } +/** + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xe_sched_submission_stop_async(&q->guc->sched); +} + static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; @@ -1866,6 +1994,28 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } +static void guc_exec_queue_unpause(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + + xe_sched_submission_start(sched); +} + +/** + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_unpause(q); + + wake_up_all(&guc->ct.wq); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { @@ -2378,6 +2528,32 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) } /** + * xe_guc_register_exec_queue - Register exec queue for a given context type. + * @q: Execution queue + * @ctx_type: Type of the context + * + * This function registers the execution queue with the guc. Special context + * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE + * are only applicable for IGPU and in the VF. + * Submits the execution queue to GUC after registering it. + * + * Returns - None. + */ +void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, IS_SRIOV_VF(xe)); + xe_assert(xe, !IS_DGFX(xe)); + xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL && + ctx_type < GUC_CONTEXT_COUNT)); + + register_exec_queue(q, ctx_type); + enable_scheduling(q); +} + +/** * xe_guc_submit_print - GuC Submit Print. * @guc: GuC. * @p: drm_printer where it will be printed out. @@ -2397,3 +2573,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) guc_exec_queue_print(q, p); mutex_unlock(&guc->submission_state.lock); } + +/** + * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all + * exec queues registered to given GuC. + * @guc: the &xe_guc struct instance + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure. + */ +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) +{ + struct xe_exec_queue *q; + unsigned long index; + int err = 0; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); + if (err) + break; + if (xe_exec_queue_is_parallel(q)) + err = wq_items_rebase(q); + if (err) + break; + } + mutex_unlock(&guc->submission_state.lock); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 9b71a986c6ca..6b5df5d0956b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -18,6 +18,11 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_pause(struct xe_guc *guc); +void xe_guc_submit_unpause(struct xe_guc *guc); +int xe_guc_submit_reset_block(struct xe_guc *guc); +void xe_guc_submit_reset_unblock(struct xe_guc *guc); +int xe_guc_wait_reset_unblock(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); @@ -29,6 +34,8 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_guc_jobs_ring_rebase(struct xe_guc *guc); + struct xe_guc_submit_exec_queue_snapshot * xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); void @@ -39,5 +46,8 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); +void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type); + +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c new file mode 100644 index 000000000000..6bf2103602f8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" + +#include "xe_device.h" +#include "xe_gt_stats.h" +#include "xe_gt_types.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_force_wake.h" +#include "xe_mmio.h" +#include "xe_tlb_inval.h" + +#include "regs/xe_guc_regs.h" + +/* + * XXX: The seqno algorithm relies on TLB invalidation being processed in order + * which they currently are by the GuC, if that changes the algorithm will need + * to be updated. + */ + +static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, action[1]); /* Seqno */ + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); + return xe_guc_ct_send(&guc->ct, action, len, + G2H_LEN_DW_TLB_INVALIDATE, 1); +} + +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + +static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); +} + +static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + + /* + * Returning -ECANCELED in this function is squashed at the caller and + * signals waiters. + */ + + if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + struct xe_mmio *mmio = >->mmio; + unsigned int fw_ref; + + if (IS_SRIOV_VF(xe)) + return -ECANCELED; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(mmio, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + xe_force_wake_put(gt_to_fw(gt), fw_ref); + } + + return -ECANCELED; +} + +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + +static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid) +{ +#define MAX_TLB_INVALIDATION_LEN 7 + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; + int len = 0; + + if (guc_to_xe(guc)->info.force_execlist) + return -ECANCELED; + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = seqno; + if (!gt_to_xe(gt)->info.has_range_tlb_inval || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 orig_start = start; + u64 align; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(orig_start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); + + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); + + return send_tlb_inval(guc, action, len); +} + +static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + return xe_guc_ct_initialized(&guc->ct); +} + +static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + LNL_FLUSH_WORK(&guc->ct.g2h_worker); +} + +static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + /* this reflects what HW/GuC needs to process TLB inv request */ + const long hw_tlb_timeout = HZ / 4; + + /* this estimates actual delay caused by the CTB transport */ + long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); + + return hw_tlb_timeout + 2 * delay; +} + +static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { + .all = send_tlb_inval_all, + .ggtt = send_tlb_inval_ggtt, + .ppgtt = send_tlb_inval_ppgtt, + .initialized = tlb_inval_initialized, + .flush = tlb_inval_flush, + .timeout_delay = tlb_inval_timeout_delay, +}; + +/** + * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early + * @guc: GuC object + * @tlb_inval: TLB invalidation client + * + * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation + * client to the GuC and setting GuC backend ops. + */ +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval) +{ + tlb_inval->private = guc; + tlb_inval->ops = &guc_tlb_inval_ops; +} + +/** + * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler + * @guc: guc + * @msg: message indicating TLB invalidation done + * @len: length of message + * + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any + * invalidation fences for seqno. Algorithm for this depends on seqno being + * received in-order and asserts this assumption. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (unlikely(len != 1)) + return -EPROTO; + + xe_tlb_inval_done_handler(>->tlb_inval, msg[0]); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.h b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h new file mode 100644 index 000000000000..07d668b02e3d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_TLB_INVAL_H_ +#define _XE_GUC_TLB_INVAL_H_ + +#include <linux/types.h> + +struct xe_guc; +struct xe_tlb_inval; + +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval); + +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 1fde7614fcc5..c7b9642b41ba 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -85,6 +85,12 @@ struct xe_guc { struct xarray exec_queue_lookup; /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; + /** + * @submission_state.reset_blocked: reset attempts are blocked; + * blocking reset in order to delay it may be required if running + * an operation which is sensitive to resets. + */ + atomic_t reset_blocked; /** @submission_state.lock: protects submission state */ struct mutex lock; /** @submission_state.enabled: submission is enabled */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 6d7b62724126..a415ca488791 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -197,7 +197,7 @@ int xe_heci_gsc_init(struct xe_device *xe) if (ret) return ret; - if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) { + if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 796ba8c34a16..1cf623b4a5bc 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -576,7 +576,7 @@ static void adjust_idledly(struct xe_hw_engine *hwe) u32 maxcnt_units_ns = 640; bool inhibit_switch = 0; - if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) { idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index c926f840c87b..58bee3ffe881 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -103,8 +103,8 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) break; case XE_ENGINE_CLASS_OTHER: break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); + case XE_ENGINE_CLASS_MAX: + xe_gt_assert(gt, false); } } diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c new file mode 100644 index 000000000000..8c65291f36fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/fault-inject.h> + +#include "regs/xe_gsc_regs.h" +#include "regs/xe_hw_error_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_hw_error.h" +#include "xe_mmio.h" +#include "xe_survivability_mode.h" + +#define HEC_UNCORR_FW_ERR_BITS 4 +extern struct fault_attr inject_csc_hw_error; + +/* Error categories reported by hardware */ +enum hardware_error { + HARDWARE_ERROR_CORRECTABLE = 0, + HARDWARE_ERROR_NONFATAL = 1, + HARDWARE_ERROR_FATAL = 2, + HARDWARE_ERROR_MAX, +}; + +static const char * const hec_uncorrected_fw_errors[] = { + "Fatal", + "CSE Disabled", + "FD Corruption", + "Data Corruption" +}; + +static const char *hw_error_to_str(const enum hardware_error hw_err) +{ + switch (hw_err) { + case HARDWARE_ERROR_CORRECTABLE: + return "CORRECTABLE"; + case HARDWARE_ERROR_NONFATAL: + return "NONFATAL"; + case HARDWARE_ERROR_FATAL: + return "FATAL"; + default: + return "UNKNOWN"; + } +} + +static bool fault_inject_csc_hw_error(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1); +} + +static void csc_hw_error_work(struct work_struct *work) +{ + struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + ret = xe_survivability_mode_runtime_enable(xe); + if (ret) + drm_err(&xe->drm, "Failed to enable runtime survivability mode\n"); +} + +static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + struct xe_mmio *mmio = &tile->mmio; + u32 base, err_bit, err_src; + unsigned long fw_err; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + base = BMG_GSC_HECI1_BASE; + lockdep_assert_held(&xe->irq.lock); + err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported HEC_ERR_STATUS_%s blank\n", + tile->id, hw_err_str); + return; + } + + if (err_src & UNCORR_FW_REPORTED_ERR) { + fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base)); + for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) { + drm_err_ratelimited(&xe->drm, HW_ERR + "%s: HEC Uncorrected FW %s error reported, bit[%d] is set\n", + hw_err_str, hec_uncorrected_fw_errors[err_bit], + err_bit); + + schedule_work(&tile->csc_hw_error_work); + } + } + + xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src); +} + +static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + unsigned long flags; + u32 err_src; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + spin_lock_irqsave(&xe->irq.lock, flags); + err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n", + tile->id, hw_err_str); + goto unlock; + } + + if (err_src & XE_CSC_ERROR) + csc_hw_error_handler(tile, hw_err); + + xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src); + +unlock: + spin_unlock_irqrestore(&xe->irq.lock, flags); +} + +/** + * xe_hw_error_irq_handler - irq handling for hw errors + * @tile: tile instance + * @master_ctl: value read from master interrupt register + * + * Xe platforms add three error bits to the master interrupt register to support error handling. + * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE. + * To process the interrupt, determine the source of error by reading the Device Error Source + * Register that corresponds to the class of error being serviced. + */ +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) +{ + enum hardware_error hw_err; + + if (fault_inject_csc_hw_error()) + schedule_work(&tile->csc_hw_error_work); + + for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) + if (master_ctl & ERROR_IRQ(hw_err)) + hw_error_source_handler(tile, hw_err); +} + +/* + * Process hardware errors during boot + */ +static void process_hw_errors(struct xe_device *xe) +{ + struct xe_tile *tile; + u32 master_ctl; + u8 id; + + for_each_tile(tile, xe, id) { + master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ); + xe_hw_error_irq_handler(tile, master_ctl); + xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl); + } +} + +/** + * xe_hw_error_init - Initialize hw errors + * @xe: xe device instance + * + * Initialize and check for errors that occurred during boot + * prior to driver load + */ +void xe_hw_error_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + return; + + INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work); + + process_hw_errors(xe); +} diff --git a/drivers/gpu/drm/xe/xe_hw_error.h b/drivers/gpu/drm/xe/xe_hw_error.h new file mode 100644 index 000000000000..d86e28c5180c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef XE_HW_ERROR_H_ +#define XE_HW_ERROR_H_ + +#include <linux/types.h> + +struct xe_tile; +struct xe_device; + +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl); +void xe_hw_error_init(struct xe_device *xe); +#endif diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index c17ed1ae8649..32a76ae6e9dc 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - u32 val0, val1; + u32 val0 = 0, val1 = 0; int ret = 0; ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, @@ -734,7 +734,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { int ret; - u32 uval; + u32 uval = 0; mutex_lock(&hwmon->hwmon_lock); @@ -918,7 +918,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) static umode_t xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; /* hwmon sysfs attribute of current available only for package */ if (channel != CHANNEL_PKG) @@ -1020,7 +1020,7 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; if (!hwmon->xe->info.has_fan_control) return 0; diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index bc7dc2099470..044dda517b7c 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -147,6 +147,20 @@ static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) } /** + * xe_i2c_present - I2C controller is present and functional + * @xe: xe device instance + * + * Check whether the I2C controller is present and functioning with valid + * endpoint cookie. + * + * Return: %true if present, %false otherwise. + */ +bool xe_i2c_present(struct xe_device *xe) +{ + return xe->i2c && xe->i2c->ep.cookie == XE_I2C_EP_COOKIE_DEVICE; +} + +/** * xe_i2c_irq_handler: Handler for I2C interrupts * @xe: xe device instance * @master_ctl: interrupt register @@ -230,7 +244,7 @@ void xe_i2c_pm_suspend(struct xe_device *xe) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + if (!xe_i2c_present(xe)) return; xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); @@ -241,7 +255,7 @@ void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + if (!xe_i2c_present(xe)) return; if (d3cold) diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h index b767ed8ce52b..ecd5f10358e2 100644 --- a/drivers/gpu/drm/xe/xe_i2c.h +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -49,11 +49,13 @@ struct xe_i2c { #if IS_ENABLED(CONFIG_I2C) int xe_i2c_probe(struct xe_device *xe); +bool xe_i2c_present(struct xe_device *xe); void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); void xe_i2c_pm_suspend(struct xe_device *xe); void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); #else static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline bool xe_i2c_present(struct xe_device *xe) { return false; } static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5df5b8c2a3e4..870edaf69388 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,6 +18,7 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_hw_error.h" #include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" @@ -468,6 +469,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl); gt_irq_handler(tile, master_ctl, intr_dw, identity); + xe_hw_error_irq_handler(tile, master_ctl); /* * Display interrupts (including display backlight operations @@ -756,6 +758,8 @@ int xe_irq_install(struct xe_device *xe) int nvec = 1; int err; + xe_hw_error_init(xe); + xe_irq_reset(xe); if (xe_device_has_msix(xe)) { diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index a2000307d5bf..f2bfbfa3efa1 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,7 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" -#include "xe_gt_tlb_invalidation.h" +#include "xe_tlb_inval.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -195,14 +195,17 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) struct xe_tile *tile = lmtt_to_tile(lmtt); struct xe_device *xe = tile_to_xe(tile); dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); + struct xe_gt *gt; + u8 id; lmtt_debug(lmtt, "DIR offset %pad\n", &offset); lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); - xe_mmio_write32(&tile->mmio, - GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, - LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); + for_each_gt_on_tile(gt, tile, id) + xe_mmio_write32(>->mmio, + GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, + LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); } /** @@ -225,8 +228,8 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) { - struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; - struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tlb_inval_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_tlb_inval_fence *fence = fences; struct xe_tile *tile = lmtt_to_tile(lmtt); struct xe_gt *gt; int result = 0; @@ -234,8 +237,8 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) u8 id; for_each_gt_on_tile(gt, tile, id) { - xe_gt_tlb_invalidation_fence_init(gt, fence, true); - err = xe_gt_tlb_invalidation_all(gt, fence); + xe_tlb_inval_fence_init(>->tlb_inval, fence, true); + err = xe_tlb_inval_all(>->tlb_inval, fence); result = result ?: err; fence++; } @@ -249,7 +252,7 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) */ fence = fences; for_each_gt_on_tile(gt, tile, id) - xe_gt_tlb_invalidation_fence_wait(fence++); + xe_tlb_inval_fence_wait(fence++); return result; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 6d38411bdeba..8f6c3ba47882 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -41,7 +41,6 @@ #define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K -#define LRC_WA_BB_SIZE SZ_4K /* * Layout of the LRC and associated data allocated as @@ -76,6 +75,11 @@ lrc_to_xe(struct xe_lrc *lrc) static bool gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) { + if (XE_GT_WA(gt, 16010904313) && + (class == XE_ENGINE_CLASS_RENDER || + class == XE_ENGINE_CLASS_COMPUTE)) + return true; + return false; } @@ -692,7 +696,13 @@ u32 xe_lrc_regs_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } -static size_t lrc_reg_size(struct xe_device *xe) +/** + * xe_lrc_reg_size() - Get size of the LRC registers area within queues + * @xe: the &xe_device struct instance + * + * Returns: Size of the LRC registers area for current platform + */ +size_t xe_lrc_reg_size(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1250) return 96 * sizeof(u32); @@ -702,7 +712,7 @@ static size_t lrc_reg_size(struct xe_device *xe) size_t xe_lrc_skip_size(struct xe_device *xe) { - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); + return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -943,6 +953,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) return data; } +/** + * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC + * of given engine. + * @hwe: the &xe_hw_engine struct instance + */ +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 *regs; + + if (!gt->default_lrc[hwe->class]) + return; + + regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE; + set_memory_based_intr(regs, hwe); +} + +/** + * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data + * for given LRC. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @regs: scratch buffer to be used as temporary storage + */ +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs) +{ + struct xe_gt *gt = hwe->gt; + struct iosys_map map; + size_t regs_len; + + if (!xe_device_uses_memirq(gt_to_xe(gt))) + return; + + map = __xe_lrc_regs_map(lrc); + regs_len = xe_lrc_reg_size(gt_to_xe(gt)); + xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len); + set_memory_based_intr(regs, hwe); + xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len); +} + static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); @@ -1014,6 +1065,63 @@ static ssize_t setup_utilization_wa(struct xe_lrc *lrc, return cmd - batch; } +static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 16010904313) || + !(hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE || + hwe->class == XE_ENGINE_CLASS_COPY || + hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE || + hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + return cmd - batch; +} + +static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 18022495364) || + hwe->class != XE_ENGINE_CLASS_RENDER) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = CS_DEBUG_MODE1(0).addr; + *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + + return cmd - batch; +} + struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); @@ -1040,13 +1148,11 @@ static int setup_bo(struct bo_setup_state *state) ssize_t remain; if (state->lrc->bo->vmap.is_iomem) { - state->buffer = kmalloc(state->max_size, GFP_KERNEL); if (!state->buffer) return -ENOMEM; state->ptr = state->buffer; } else { state->ptr = state->lrc->bo->vmap.vaddr + state->offset; - state->buffer = NULL; } remain = state->max_size / sizeof(u32); @@ -1071,7 +1177,6 @@ static int setup_bo(struct bo_setup_state *state) return 0; fail: - kfree(state->buffer); return -ENOSPC; } @@ -1083,18 +1188,27 @@ static void finish_bo(struct bo_setup_state *state) xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, state->offset, state->buffer, state->written * sizeof(u32)); - kfree(state->buffer); } -static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +/** + * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @scratch: preallocated scratch buffer for temporary storage + * Return: 0 on success, negative error code on failure + */ +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch) { static const struct bo_setup funcs[] = { + { .setup = setup_timestamp_wa }, + { .setup = setup_invalidate_state_cache_wa }, { .setup = setup_utilization_wa }, }; struct bo_setup_state state = { .lrc = lrc, .hwe = hwe, .max_size = LRC_WA_BB_SIZE, + .buffer = scratch, .reserve_dw = 1, .offset = __xe_lrc_wa_bb_offset(lrc), .funcs = funcs, @@ -1117,15 +1231,32 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + u32 *buf = NULL; + int ret; + + if (lrc->bo->vmap.is_iomem) + buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); + + ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); + + kfree(buf); + + return ret; +} + static int setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { static struct bo_setup rcs_funcs[] = { + { .setup = setup_timestamp_wa }, }; struct bo_setup_state state = { .lrc = lrc, .hwe = hwe, .max_size = (63 * 64) /* max 63 cachelines */, + .buffer = NULL, .offset = __xe_lrc_indirect_ctx_offset(lrc), }; int ret; @@ -1142,9 +1273,14 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) return 0; + if (lrc->bo->vmap.is_iomem) + state.buffer = kmalloc(state.max_size, GFP_KERNEL); + ret = setup_bo(&state); - if (ret) + if (ret) { + kfree(state.buffer); return ret; + } /* * Align to 64B cacheline so there's no garbage at the end for CS to @@ -1156,6 +1292,7 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) } finish_bo(&state); + kfree(state.buffer); xe_lrc_write_ctx_reg(lrc, CTX_CS_INDIRECT_CTX, @@ -1374,6 +1511,23 @@ void xe_lrc_destroy(struct kref *ref) kfree(lrc); } +/** + * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC. + * @lrc: the &xe_lrc struct instance + */ +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) { + xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, + __xe_lrc_indirect_ring_ggtt_addr(lrc)); + + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, + __xe_lrc_ring_ggtt_addr(lrc)); + } else { + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + } +} + void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1939,7 +2093,7 @@ u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) * continue to emit all of the SVG state since it's best not to leak * any of the state between contexts, even if that leakage is harmless. */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); } diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index b6c8053c581b..188565465779 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -42,6 +42,8 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4) #define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4) +#define LRC_WA_BB_SIZE SZ_4K + #define XE_LRC_CREATE_RUNALONE 0x1 #define XE_LRC_CREATE_PXP 0x2 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, @@ -88,6 +90,10 @@ bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc); +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe); +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs); u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); @@ -106,6 +112,7 @@ s32 xe_lrc_start_seqno(struct xe_lrc *lrc); u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); +size_t xe_lrc_reg_size(struct xe_device *xe); size_t xe_lrc_skip_size(struct xe_device *xe); void xe_lrc_dump_default(struct drm_printer *p, @@ -124,6 +131,8 @@ u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *scratch); /** * xe_lrc_update_timestamp - readout LRC timestamp and update cached value diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7d20ac4bb633..9643442ef101 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -9,6 +9,7 @@ #include <linux/sizes.h> #include <drm/drm_managed.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_tt.h> #include <uapi/drm/xe_drm.h> @@ -30,10 +31,12 @@ #include "xe_mocs.h" #include "xe_pt.h" #include "xe_res_cursor.h" +#include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_vm.h" +#include "xe_vram.h" /** * struct xe_migrate - migrate context. @@ -84,19 +87,6 @@ struct xe_migrate { */ #define MAX_PTE_PER_SDI 0x1FEU -/** - * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. - * @tile: The tile. - * - * Returns the default migrate exec queue of this tile. - * - * Return: The default migrate exec queue - */ -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) -{ - return tile->migrate->q; -} - static void xe_migrate_fini(void *arg) { struct xe_migrate *m = arg; @@ -130,38 +120,39 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte) u64 identity_offset = IDENTITY_OFFSET; if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) - identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size + (xe->mem.vram), SZ_1G); - addr -= xe->mem.vram.dpa_base; + addr -= xe_vram_region_dpa_base(xe->mem.vram); return addr + (identity_offset << xe_pt_shift(2)); } static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) { + struct xe_vram_region *vram = xe->mem.vram; + resource_size_t dpa_base = xe_vram_region_dpa_base(vram); u64 pos, ofs, flags; u64 entry; /* XXX: Unclear if this should be usable_size? */ - u64 vram_limit = xe->mem.vram.actual_physical_size + - xe->mem.vram.dpa_base; + u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base; u32 level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0); - xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M)); /* * Use 1GB pages when possible, last chunk always use 2M * pages as mixing reserved memory (stolen, WOCPM) with a single * mapping is not allowed on certain platforms. */ - for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + for (pos = dpa_base; pos < vram_limit; pos += SZ_1G, ofs += 8) { if (pos + SZ_1G >= vram_limit) { - entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, - pat_index); + entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs); xe_map_wr(xe, &bo->vmap, ofs, u64, entry); flags = vm->pt_ops->pte_encode_addr(xe, 0, @@ -215,7 +206,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* PT30 & PT31 reserved for 2M identity map */ pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; - entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); + entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; @@ -283,15 +274,14 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, flags = XE_PDE_64K; entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * - XE_PAGE_SIZE, pat_index); + XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } /* Write PDE's that point to our BO. */ - for (i = 0; i < map_ofs / PAGE_SIZE; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, - pat_index); + for (i = 0; i < map_ofs / XE_PAGE_SIZE; i++) { + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); @@ -307,11 +297,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; + resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram); xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); - xe_assert(xe, xe->mem.vram.actual_physical_size <= - (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); /* * Identity map the entire vram for compressed pat_index for xe2+ @@ -320,11 +310,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + - DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G); u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; - xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - + IDENTITY_OFFSET / 2) * SZ_1G); xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, comp_pat_index, pt31_ofs); } @@ -387,38 +377,47 @@ static bool xe_migrate_needs_ccs_emit(struct xe_device *xe) } /** + * xe_migrate_alloc - Allocate a migrate struct for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_migrate for a given tile. + * + * Return: &xe_migrate on success, or NULL when out of memory. + */ +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile) +{ + struct xe_migrate *m = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*m), GFP_KERNEL); + + if (m) + m->tile = tile; + return m; +} + +/** * xe_migrate_init() - Initialize a migrate context - * @tile: Back-pointer to the tile we're initializing for. + * @m: The migration context * - * Return: Pointer to a migrate context on success. Error pointer on error. + * Return: 0 if successful, negative error code on failure */ -struct xe_migrate *xe_migrate_init(struct xe_tile *tile) +int xe_migrate_init(struct xe_migrate *m) { - struct xe_device *xe = tile_to_xe(tile); + struct xe_tile *tile = m->tile; struct xe_gt *primary_gt = tile->primary_gt; - struct xe_migrate *m; + struct xe_device *xe = tile_to_xe(tile); struct xe_vm *vm; int err; - m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); - - m->tile = tile; - /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) - return ERR_CAST(vm); + return PTR_ERR(vm); xe_vm_lock(vm, false); err = xe_migrate_prepare_vm(tile, m, vm); xe_vm_unlock(vm); - if (err) { - xe_vm_close_and_put(vm); - return ERR_PTR(err); - } + if (err) + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -427,8 +426,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) false); u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); - if (!hwe || !logical_mask) - return ERR_PTR(-EINVAL); + if (!hwe || !logical_mask) { + err = -EINVAL; + goto err_out; + } /* * XXX: Currently only reserving 1 (likely slow) BCS instance on @@ -437,16 +438,18 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | - EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0); + EXEC_QUEUE_FLAG_HIGH_PRIORITY | + EXEC_QUEUE_FLAG_MIGRATE, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT, 0); + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE, 0); } if (IS_ERR(m->q)) { - xe_vm_close_and_put(vm); - return ERR_CAST(m->q); + err = PTR_ERR(m->q); + goto err_out; } mutex_init(&m->job_mutex); @@ -456,7 +459,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); if (err) - return ERR_PTR(err); + return err; if (IS_DGFX(xe)) { if (xe_migrate_needs_ccs_emit(xe)) @@ -471,7 +474,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) (unsigned long long)m->min_chunk_size); } - return m; + return err; + +err_out: + xe_vm_close_and_put(vm); + return err; + } static u64 max_mem_transfer_per_pass(struct xe_device *xe) @@ -896,7 +904,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, flush_flags); + xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB); if (!fence) { err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); @@ -940,6 +948,167 @@ err_sync: return fence; } +/** + * xe_migrate_lrc() - Get the LRC from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate) +{ + return migrate->q->lrc[0]; +} + +static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw, int i, + u32 flags) +{ + struct xe_lrc *lrc = xe_exec_queue_lrc(q); + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | flags; + dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)) | + MI_FLUSH_DW_USE_GTT; + dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + return i; +} + +/** + * xe_migrate_ccs_rw_copy() - Copy content of TTM resources. + * @tile: Tile whose migration context to be used. + * @q : Execution to be used along with migration context. + * @src_bo: The buffer object @src is currently bound to. + * @read_write : Creates BB commands for CCS read/write. + * + * Creates batch buffer instructions to copy CCS metadata from CCS pool to + * memory and vice versa. + * + * This function should only be called for IGPU. + * + * Return: 0 if successful, negative error code on failure. + */ +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write) + +{ + bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX; + bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX; + struct ttm_resource *src = src_bo->ttm.resource; + struct xe_migrate *m = tile->migrate; + struct xe_gt *gt = tile->primary_gt; + u32 batch_size, batch_size_allocated; + struct xe_device *xe = gt_to_xe(gt); + struct xe_res_cursor src_it, ccs_it; + u64 size = xe_bo_size(src_bo); + struct xe_bb *bb = NULL; + u64 src_L0, src_L0_ofs; + u32 src_L0_pt; + int err; + + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); + + xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + /* Calculate Batch buffer size */ + batch_size = 0; + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = min_t(u64, max_mem_transfer_per_pass(xe), size); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + + /* Add copy commands size here */ + batch_size += EMIT_COPY_CCS_DW; + + size -= src_L0; + } + + bb = xe_bb_ccs_new(gt, batch_size, read_write); + if (IS_ERR(bb)) { + drm_err(&xe->drm, "BB allocation failed.\n"); + err = PTR_ERR(bb); + goto err_ret; + } + + batch_size_allocated = batch_size; + size = xe_bo_size(src_bo); + batch_size = 0; + + /* + * Emit PTE and copy commands here. + * The CCS copy command can only support limited size. If the size to be + * copied is more than the limit, divide copy into chunks. So, calculate + * sizes here again before copy command is emitted. + */ + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u32 flush_flags = 0; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + batch_size += EMIT_COPY_CCS_DW; + + emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + + bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + src_L0_ofs, dst_is_pltt, + src_L0, ccs_ofs, true); + bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + + size -= src_L0; + } + + xe_assert(xe, (batch_size_allocated == bb->len)); + src_bo->bb_ccs[read_write] = bb; + + return 0; + +err_ret: + return err; +} + +/** + * xe_get_migrate_exec_queue() - Get the execution queue from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to execution queue on success, error on failure + */ +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate) +{ + return migrate->q; +} + static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { @@ -1119,11 +1288,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; /* Preemption is enabled again by the ring ops. */ - if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) { xe_res_next(&src_it, clear_L0); - else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, - &src_it, clear_L0, dst); + } else { + emit_pte(m, bb, clear_L0_pt, clear_vram, + clear_only_system_ccs, &src_it, clear_L0, dst); + flush_flags |= MI_INVALIDATE_TLB; + } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -1134,7 +1305,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (xe_migrate_needs_ccs_emit(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); - flush_flags = MI_FLUSH_DW_CCS; + flush_flags |= MI_FLUSH_DW_CCS; } job = xe_bb_create_migration_job(m->q, bb, @@ -1469,6 +1640,8 @@ next_cmd: goto err_sa; } + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + if (ops->pre_commit) { pt_update->job = job; err = ops->pre_commit(pt_update); @@ -1571,7 +1744,8 @@ static u32 pte_update_cmd_size(u64 size) static void build_pt_update_batch_sram(struct xe_migrate *m, struct xe_bb *bb, u32 pt_offset, - dma_addr_t *sram_addr, u32 size) + struct drm_pagemap_addr *sram_addr, + u32 size) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; u32 ptes; @@ -1589,14 +1763,18 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr = sram_addr[i++] & PAGE_MASK; + u64 addr = sram_addr[i].addr & PAGE_MASK; + xe_tile_assert(m->tile, sram_addr[i].proto == + DRM_INTERCONNECT_SYSTEM); xe_tile_assert(m->tile, addr); addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, addr, pat_index, 0, false, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); + + i++; } } } @@ -1612,7 +1790,8 @@ enum xe_migrate_copy_dir { static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long len, unsigned long sram_offset, - dma_addr_t *sram_addr, u64 vram_addr, + struct drm_pagemap_addr *sram_addr, + u64 vram_addr, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; @@ -1628,6 +1807,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? PAGE_SIZE : 4; int err; + unsigned long i, j; if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) @@ -1644,6 +1824,24 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, return ERR_PTR(err); } + /* + * If the order of a struct drm_pagemap_addr entry is greater than 0, + * the entry is populated by GPU pagemap but subsequent entries within + * the range of that order are not populated. + * build_pt_update_batch_sram() expects a fully populated array of + * struct drm_pagemap_addr. Ensure this is the case even with higher + * orders. + */ + for (i = 0; i < npages;) { + unsigned int order = sram_addr[i].order; + + for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) + if (!sram_addr[i + j].addr) + sram_addr[i + j].addr = sram_addr[i].addr + j * PAGE_SIZE; + + i += NR_PAGES(order); + } + build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, sram_addr, len + sram_offset); @@ -1669,7 +1867,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, 0); + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); mutex_lock(&m->job_mutex); xe_sched_job_arm(job); @@ -1694,7 +1892,7 @@ err: * xe_migrate_to_vram() - Migrate to VRAM * @m: The migration context. * @npages: Number of pages to migrate. - * @src_addr: Array of dma addresses (source of migrate) + * @src_addr: Array of DMA information (source of migrate) * @dst_addr: Device physical address of VRAM (destination of migrate) * * Copy from an array dma addresses to a VRAM device physical address @@ -1704,7 +1902,7 @@ err: */ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, + struct drm_pagemap_addr *src_addr, u64 dst_addr) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, @@ -1716,7 +1914,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * @m: The migration context. * @npages: Number of pages to migrate. * @src_addr: Device physical address of VRAM (source of migrate) - * @dst_addr: Array of dma addresses (destination of migrate) + * @dst_addr: Array of DMA information (destination of migrate) * * Copy from a VRAM device physical address to an array dma addresses * @@ -1726,61 +1924,65 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr) + struct drm_pagemap_addr *dst_addr) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, XE_MIGRATE_COPY_TO_SRAM); } -static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, +static void xe_migrate_dma_unmap(struct xe_device *xe, + struct drm_pagemap_addr *pagemap_addr, int len, int write) { unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); for (i = 0; i < npages; ++i) { - if (!dma_addr[i]) + if (!pagemap_addr[i].addr) break; - dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + dma_unmap_page(xe->drm.dev, pagemap_addr[i].addr, PAGE_SIZE, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } - kfree(dma_addr); + kfree(pagemap_addr); } -static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, - void *buf, int len, int write) +static struct drm_pagemap_addr *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, + int write) { - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); - dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); - if (!dma_addr) + pagemap_addr = kcalloc(npages, sizeof(*pagemap_addr), GFP_KERNEL); + if (!pagemap_addr) return ERR_PTR(-ENOMEM); for (i = 0; i < npages; ++i) { dma_addr_t addr; struct page *page; + enum dma_data_direction dir = write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE; if (is_vmalloc_addr(buf)) page = vmalloc_to_page(buf); else page = virt_to_page(buf); - addr = dma_map_page(xe->drm.dev, - page, 0, PAGE_SIZE, - write ? DMA_TO_DEVICE : - DMA_FROM_DEVICE); + addr = dma_map_page(xe->drm.dev, page, 0, PAGE_SIZE, dir); if (dma_mapping_error(xe->drm.dev, addr)) goto err_fault; - dma_addr[i] = addr; + pagemap_addr[i] = + drm_pagemap_addr_encode(addr, + DRM_INTERCONNECT_SYSTEM, + 0, dir); buf += PAGE_SIZE; } - return dma_addr; + return pagemap_addr; err_fault: - xe_migrate_dma_unmap(xe, dma_addr, len, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len, write); return ERR_PTR(-EFAULT); } @@ -1809,7 +2011,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, struct xe_device *xe = tile_to_xe(tile); struct xe_res_cursor cursor; struct dma_fence *fence = NULL; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; int bytes_left = len, current_page = 0; void *orig_buf = buf; @@ -1869,9 +2071,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, return err; } - dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); - if (IS_ERR(dma_addr)) - return PTR_ERR(dma_addr); + pagemap_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(pagemap_addr)) + return PTR_ERR(pagemap_addr); xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); @@ -1895,7 +2097,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, - dma_addr + current_page, + &pagemap_addr[current_page], vram_addr, write ? XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); @@ -1923,10 +2125,46 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, dma_fence_put(fence); out_err: - xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len + page_offset, write); return IS_ERR(fence) ? PTR_ERR(fence) : 0; } +/** + * xe_migrate_job_lock() - Lock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Lock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_lock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + +/** + * xe_migrate_job_unlock() - Unlock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Unlock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_unlock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index fb9839c1bae0..4fad324b6253 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -9,11 +9,13 @@ #include <linux/types.h> struct dma_fence; +struct drm_pagemap_addr; struct iosys_map; struct ttm_resource; struct xe_bo; struct xe_gt; +struct xe_tlb_inval_job; struct xe_exec_queue; struct xe_migrate; struct xe_migrate_pt_update; @@ -24,6 +26,8 @@ struct xe_vm; struct xe_vm_pgtable_update; struct xe_vma; +enum xe_sriov_vf_ccs_rw_ctxs; + /** * struct xe_migrate_pt_update_ops - Callbacks for the * xe_migrate_update_pgtables() function. @@ -89,21 +93,30 @@ struct xe_migrate_pt_update { struct xe_vma_ops *vops; /** @job: The job if a GPU page-table update. NULL otherwise */ struct xe_sched_job *job; + /** + * @ijob: The TLB invalidation job for primary GT. NULL otherwise + */ + struct xe_tlb_inval_job *ijob; + /** + * @mjob: The TLB invalidation job for media GT. NULL otherwise + */ + struct xe_tlb_inval_job *mjob; /** @tile_id: Tile ID of the update */ u8 tile_id; }; -struct xe_migrate *xe_migrate_init(struct xe_tile *tile); +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile); +int xe_migrate_init(struct xe_migrate *m); struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, + struct drm_pagemap_addr *src_addr, u64 dst_addr); struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr); + struct drm_pagemap_addr *dst_addr); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, @@ -112,6 +125,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write); + +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, unsigned long offset, void *buf, int len, int write); @@ -133,5 +152,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile); +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q); +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q); + #endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index e4db8d58ea2d..ef6f3ea573a2 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -58,7 +58,6 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; - struct xe_gt *gt; u8 id; /* @@ -68,38 +67,6 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) if (xe->info.tile_count == 1) return; - /* Possibly override number of tile based on configuration register */ - if (!xe->info.skip_mtcfg) { - struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count, gt_count; - u32 mtcfg; - - /* - * Although the per-tile mmio regs are not yet initialized, this - * is fine as it's going to the root tile's mmio, that's - * guaranteed to be initialized earlier in xe_mmio_probe_early() - */ - mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); - tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - - if (tile_count < xe->info.tile_count) { - drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); - xe->info.tile_count = tile_count; - - /* - * We've already setup gt_count according to the full - * tile count. Re-calculate it to only include the GTs - * that belong to the remaining tile(s). - */ - gt_count = 0; - for_each_gt(gt, xe, id) - if (gt->info.id < tile_count * xe->info.max_gt_per_tile) - gt_count++; - xe->info.gt_count = gt_count; - } - } - for_each_remote_tile(tile, xe, id) xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.c b/drivers/gpu/drm/xe/xe_mmio_gem.c new file mode 100644 index 000000000000..9a97c4387e4f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_mmio_gem.h" + +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_managed.h> + +#include "xe_device_types.h" + +/** + * DOC: Exposing MMIO regions to userspace + * + * In certain cases, the driver may allow userspace to mmap a portion of the hardware registers. + * + * This can be done as follows: + * 1. Call xe_mmio_gem_create() to create a GEM object with an mmap-able fake offset. + * 2. Use xe_mmio_gem_mmap_offset() on the created GEM object to retrieve the fake offset. + * 3. Provide the fake offset to userspace. + * 4. Userspace can call mmap with the fake offset. The length provided to mmap + * must match the size of the GEM object. + * 5. When the region is no longer needed, call xe_mmio_gem_destroy() to release the GEM object. + * + * NOTE: The exposed MMIO region must be page-aligned with regards to its BAR offset and size. + * + * WARNING: Exposing MMIO regions to userspace can have security and stability implications. + * Make sure not to expose any sensitive registers. + */ + +static void xe_mmio_gem_free(struct drm_gem_object *); +static int xe_mmio_gem_mmap(struct drm_gem_object *, struct vm_area_struct *); +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *); + +struct xe_mmio_gem { + struct drm_gem_object base; + phys_addr_t phys_addr; +}; + +static const struct vm_operations_struct vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, + .fault = xe_mmio_gem_vm_fault, +}; + +static const struct drm_gem_object_funcs xe_mmio_gem_funcs = { + .free = xe_mmio_gem_free, + .mmap = xe_mmio_gem_mmap, + .vm_ops = &vm_ops, +}; + +static inline struct xe_mmio_gem *to_xe_mmio_gem(struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_mmio_gem, base); +} + +/** + * xe_mmio_gem_create - Expose an MMIO region to userspace + * @xe: The xe device + * @file: DRM file descriptor + * @phys_addr: Start of the exposed MMIO region + * @size: The size of the exposed MMIO region + * + * This function creates a GEM object that exposes an MMIO region with an mmap-able + * fake offset. + * + * See: "Exposing MMIO regions to userspace" + */ +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size) +{ + struct xe_mmio_gem *obj; + struct drm_gem_object *base; + int err; + + if ((phys_addr % PAGE_SIZE != 0) || (size % PAGE_SIZE != 0)) + return ERR_PTR(-EINVAL); + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + base = &obj->base; + base->funcs = &xe_mmio_gem_funcs; + obj->phys_addr = phys_addr; + + drm_gem_private_object_init(&xe->drm, base, size); + + err = drm_gem_create_mmap_offset(base); + if (err) + goto free_gem; + + err = drm_vma_node_allow(&base->vma_node, file); + if (err) + goto free_gem; + + return obj; + +free_gem: + xe_mmio_gem_free(base); + return ERR_PTR(err); +} + +/** + * xe_mmio_gem_mmap_offset - Return the mmap-able fake offset + * @gem: the GEM object created with xe_mmio_gem_create() + * + * This function returns the mmap-able fake offset allocated during + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem) +{ + return drm_vma_node_offset_addr(&gem->base.vma_node); +} + +static void xe_mmio_gem_free(struct drm_gem_object *base) +{ + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + + drm_gem_object_release(base); + kfree(obj); +} + +/** + * xe_mmio_gem_destroy - Destroy the GEM object that exposes an MMIO region + * @gem: the GEM object to destroy + * + * This function releases resources associated with the GEM object created by + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem) +{ + xe_mmio_gem_free(&gem->base); +} + +static int xe_mmio_gem_mmap(struct drm_gem_object *base, struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != base->size) + return -EINVAL; + + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + /* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 */ + vma->vm_pgoff = 0; + vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); + + /* Defer actual mapping to the fault handler. */ + return 0; +} + +static void xe_mmio_gem_release_dummy_page(struct drm_device *dev, void *res) +{ + __free_page((struct page *)res); +} + +static vm_fault_t xe_mmio_gem_vm_fault_dummy_page(struct vm_area_struct *vma) +{ + struct drm_gem_object *base = vma->vm_private_data; + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + struct page *page; + unsigned long pfn; + unsigned long i; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + if (drmm_add_action_or_reset(dev, xe_mmio_gem_release_dummy_page, page)) + return VM_FAULT_OOM; + + pfn = page_to_pfn(page); + + /* Map the entire VMA to the same dummy page */ + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + + ret = vmf_insert_pfn(vma, addr, pfn); + if (ret & VM_FAULT_ERROR) + break; + } + + return ret; +} + +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *base = vma->vm_private_data; + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long i; + int idx; + + if (!drm_dev_enter(dev, &idx)) { + /* + * Provide a dummy page to avoid SIGBUS for events such as hot-unplug. + * This gives the userspace the option to recover instead of crashing. + * It is assumed the userspace will receive the notification via some + * other channel (e.g. drm uevent). + */ + return xe_mmio_gem_vm_fault_dummy_page(vma); + } + + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + unsigned long phys_addr = obj->phys_addr + i; + + ret = vmf_insert_pfn(vma, addr, PHYS_PFN(phys_addr)); + if (ret & VM_FAULT_ERROR) + break; + } + + drm_dev_exit(idx); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.h b/drivers/gpu/drm/xe/xe_mmio_gem.h new file mode 100644 index 000000000000..4b76d5586ebb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_MMIO_GEM_H_ +#define _XE_MMIO_GEM_H_ + +#include <linux/types.h> + +struct drm_file; +struct xe_device; +struct xe_mmio_gem; + +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size); +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem); +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem); + +#endif /* _XE_MMIO_GEM_H_ */ diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index d9391bd08194..d08338fc3bc1 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -135,24 +135,17 @@ static const struct init_funcs init_funcs[] = { }, }; -static int __init xe_call_init_func(unsigned int i) +static int __init xe_call_init_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return 0; - if (!init_funcs[i].init) - return 0; - - return init_funcs[i].init(); + if (func->init) + return func->init(); + return 0; } -static void xe_call_exit_func(unsigned int i) +static void xe_call_exit_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return; - if (!init_funcs[i].exit) - return; - - init_funcs[i].exit(); + if (func->exit) + func->exit(); } static int __init xe_init(void) @@ -160,10 +153,12 @@ static int __init xe_init(void) int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = xe_call_init_func(i); + err = xe_call_init_func(init_funcs + i); if (err) { + pr_info("%s: module_init aborted at %ps %pe\n", + DRIVER_NAME, init_funcs[i].init, ERR_PTR(err)); while (i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); return err; } } @@ -176,7 +171,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 61b0a1531a53..9ca4a5ae1693 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -39,17 +39,17 @@ static void xe_nvm_release_dev(struct device *dev) static bool xe_nvm_non_posted_erase(struct xe_device *xe) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); if (xe->info.platform != XE_BATTLEMAGE) return false; - return !(xe_mmio_read32(>->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & NVM_NON_POSTED_ERASE_CHICKEN_BIT); } static bool xe_nvm_writable_override(struct xe_device *xe) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); bool writable_override; resource_size_t base; @@ -72,7 +72,7 @@ static bool xe_nvm_writable_override(struct xe_device *xe) } writable_override = - !(xe_mmio_read32(>->mmio, HECI_FWSTS2(base)) & + !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) & HECI_FW_STATUS_2_NVM_ACCESS_MODE); if (writable_override) drm_info(&xe->drm, "NVM access overridden by jumper\n"); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5729e7d3e335..a188bad172ad 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -822,7 +822,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) u32 sqcnt1; /* Enable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1079,7 +1079,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1754,7 +1754,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode. */ - if (XE_WA(stream->gt, 1509372804)) { + if (XE_GT_WA(stream->gt, 1509372804)) { ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) @@ -1886,7 +1886,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) { u32 reg, shift; - if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) { + if (XE_GT_WA(gt, 18013179988) || XE_GT_WA(gt, 14015568240)) { xe_pm_runtime_get(gt_to_xe(gt)); reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c40ef426f0c..046d330bad34 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -17,6 +17,8 @@ #include "display/xe_display.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gt.h" @@ -55,7 +57,7 @@ static const struct xe_graphics_desc graphics_xelp = { }; #define XE_HP_FEATURES \ - .has_range_tlb_invalidation = true, \ + .has_range_tlb_inval = true, \ .va_bits = 48, \ .vm_max_level = 3 @@ -103,7 +105,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_range_tlb_invalidation = 1, \ + .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ .va_bits = 48, \ @@ -169,6 +171,7 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -193,6 +196,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { @@ -210,6 +214,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { @@ -225,6 +230,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -270,6 +276,7 @@ static const struct xe_device_desc ats_m_desc = { DG2_FEATURES, .has_display = false, + .has_sriov = true, }; static const struct xe_device_desc dg2_desc = { @@ -599,6 +606,44 @@ static int xe_info_init_early(struct xe_device *xe, } /* + * Possibly override number of tile based on configuration register. + */ +static void xe_info_probe_tile_count(struct xe_device *xe) +{ + struct xe_mmio *mmio; + u8 tile_count; + u32 mtcfg; + + KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe); + + /* + * Probe for tile count only for platforms that support multiple + * tiles. + */ + if (xe->info.tile_count == 1) + return; + + if (xe->info.skip_mtcfg) + return; + + mmio = xe_root_tile_mmio(xe); + + /* + * Although the per-tile mmio regs are not yet initialized, this + * is fine as it's going to the root tile's mmio, that's + * guaranteed to be initialized earlier in xe_mmio_probe_early() + */ + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); + tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + + if (tile_count < xe->info.tile_count) { + drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", + xe->info.tile_count, tile_count); + xe->info.tile_count = tile_count; + } +} + +/* * Initialize device info content that does require knowledge about * graphics / media IP version. * Make sure that GT / tile structures allocated by the driver match the data @@ -668,10 +713,12 @@ static int xe_info_init(struct xe_device *xe, /* Runtime detection may change this later */ xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; - xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; + xe_info_probe_tile_count(xe); + for_each_remote_tile(tile, xe, id) { int err; @@ -687,12 +734,17 @@ static int xe_info_init(struct xe_device *xe, * All of these together determine the overall GT count. */ for_each_tile(tile, xe, id) { + int err; + gt = tile->primary_gt; gt->info.type = XE_GT_TYPE_MAIN; gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; - xe->info.gt_count++; + + err = xe_tile_alloc_vram(tile); + if (err) + return err; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -713,9 +765,15 @@ static int xe_info_init(struct xe_device *xe, gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - xe->info.gt_count++; } + /* + * Now that we have tiles and GTs defined, let's loop over valid GTs + * in order to define gt_count. + */ + for_each_gt(gt, xe, id) + xe->info.gt_count++; + return 0; } @@ -726,7 +784,7 @@ static void xe_pci_remove(struct pci_dev *pdev) if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return; xe_device_remove(xe); @@ -759,6 +817,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct xe_device *xe; int err; + xe_configfs_check_device(pdev); + if (desc->require_force_probe && !id_forced(pdev->device)) { dev_info(&pdev->dev, "Your graphics device %04x is not officially supported\n" @@ -806,7 +866,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * flashed through mei. Return success, if survivability mode * is enabled due to pcode failure or configfs being set */ - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return 0; if (err) @@ -900,7 +960,7 @@ static int xe_pci_suspend(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); int err; - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return -EBUSY; err = xe_pm_suspend(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 4de6f69ed975..b63002fc0f67 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -60,7 +60,7 @@ struct xe_graphics_desc { u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; u8 has_indirect_ring_state:1; - u8 has_range_tlb_invalidation:1; + u8 has_range_tlb_inval:1; u8 has_usm:1; u8 has_64bit_timestamp:1; }; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e279b47ba03b..6eea4190bbd2 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -18,12 +18,14 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" -#include "xe_guc.h" +#include "xe_gt_idle.h" #include "xe_i2c.h" #include "xe_irq.h" #include "xe_pcode.h" #include "xe_pxp.h" +#include "xe_sriov_vf_ccs.h" #include "xe_trace.h" +#include "xe_vm.h" #include "xe_wa.h" /** @@ -176,6 +178,9 @@ int xe_pm_resume(struct xe_device *xe) drm_dbg(&xe->drm, "Resuming device\n"); trace_xe_pm_resume(xe, __builtin_return_address(0)); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); @@ -208,6 +213,9 @@ int xe_pm_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_SRIOV_VF(xe)) + xe_sriov_vf_ccs_register_context(xe); + drm_dbg(&xe->drm, "Device resumed\n"); return 0; err: @@ -243,6 +251,10 @@ static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + /* * Disable the system suspend direct complete optimization. * We need to ensure that the regular device suspend/resume functions @@ -290,6 +302,19 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static void xe_pm_wake_rebind_workers(struct xe_device *xe) +{ + struct xe_vm *vm, *next; + + mutex_lock(&xe->rebind_resume_lock); + list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, + preempt.pm_activate_link) { + list_del_init(&vm->preempt.pm_activate_link); + xe_vm_resume_rebind_worker(vm); + } + mutex_unlock(&xe->rebind_resume_lock); +} + static int xe_pm_notifier_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -299,30 +324,30 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + reinit_completion(&xe->pm_block); xe_pm_runtime_get(xe); err = xe_bo_evict_all_user(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); - xe_pm_runtime_put(xe); - break; - } err = xe_bo_notifier_prepare_all_pinned(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); - xe_pm_runtime_put(xe); - } + /* + * Keep the runtime pm reference until post hibernation / post suspend to + * avoid a runtime suspend interfering with evicted objects or backup + * allocations. + */ break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: + complete_all(&xe->pm_block); + xe_pm_wake_rebind_workers(xe); xe_bo_notifier_unprepare_all_pinned(xe); xe_pm_runtime_put(xe); break; } - if (err) - return NOTIFY_BAD; - return NOTIFY_DONE; } @@ -344,6 +369,14 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; + err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); + if (err) + goto err_unregister; + + init_completion(&xe->pm_block); + complete_all(&xe->pm_block); + INIT_LIST_HEAD(&xe->rebind_resume_list); + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; @@ -367,6 +400,10 @@ static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + pm_runtime_get_sync(dev); pm_runtime_forbid(dev); } @@ -525,6 +562,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_rpm_lockmap_acquire(xe); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); if (err) @@ -558,6 +598,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_SRIOV_VF(xe)) + xe_sriov_vf_ccs_register_context(xe); + out: xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c new file mode 100644 index 000000000000..a2c9ff5bfd59 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_configfs.h" +#include "xe_psmi.h" + +/* + * PSMI capture support + * + * Requirement for PSMI capture is to have a physically contiguous buffer. The + * PSMI tool owns doing all necessary configuration (MMIO register writes are + * done from user-space). However, KMD needs to provide the PSMI tool with the + * required physical address of the base of PSMI buffer in case of VRAM. + * + * VRAM backed PSMI buffer: + * Buffer is allocated as GEM object and with XE_BO_CREATE_PINNED_BIT flag which + * creates a contiguous allocation. The physical address is returned from + * psmi_debugfs_capture_addr_show(). PSMI tool can mmap the buffer via the + * PCIBAR through sysfs. + * + * SYSTEM memory backed PSMI buffer: + * Interface here does not support allocating from SYSTEM memory region. The + * PSMI tool needs to allocate memory themselves using hugetlbfs. In order to + * get the physical address, user-space can query /proc/[pid]/pagemap. As an + * alternative, CMA debugfs could also be used to allocate reserved CMA memory. + */ + +static bool psmi_enabled(struct xe_device *xe) +{ + return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)); +} + +static void psmi_free_object(struct xe_bo *bo) +{ + xe_bo_lock(bo, NULL); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); +} + +/* + * Free PSMI capture buffer objects. + */ +static void psmi_cleanup(struct xe_device *xe) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + psmi_free_object(bo); + xe->psmi.capture_obj[id] = NULL; + } + } +} + +static struct xe_bo *psmi_alloc_object(struct xe_device *xe, + unsigned int id, size_t bo_size) +{ + struct xe_bo *bo = NULL; + struct xe_tile *tile; + int err; + + if (!id || !bo_size) + return NULL; + + tile = &xe->tiles[id - 1]; + + /* VRAM: Allocate GEM object for the capture buffer */ + bo = xe_bo_create_locked(xe, tile, NULL, bo_size, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_NEEDS_CPU_ACCESS); + + if (!IS_ERR(bo)) { + /* Buffer written by HW, ensure stays resident */ + err = xe_bo_pin(bo); + if (err) + bo = ERR_PTR(err); + xe_bo_unlock(bo); + } + + return bo; +} + +/* + * Allocate PSMI capture buffer objects (via debugfs set function), based on + * which regions the user has selected in region_mask. @size: size in bytes + * (should be power of 2) + * + * Always release/free the current buffer objects before attempting to allocate + * new ones. Size == 0 will free all current buffers. + * + * Note, we don't write any registers as the capture tool is already configuring + * all PSMI registers itself via mmio space. + */ +static int psmi_resize_object(struct xe_device *xe, size_t size) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo = NULL; + int err = 0; + + /* if resizing, free currently allocated buffers first */ + psmi_cleanup(xe); + + /* can set size to 0, in which case, now done */ + if (!size) + return 0; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = psmi_alloc_object(xe, id, size); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + break; + } + xe->psmi.capture_obj[id] = bo; + + drm_info(&xe->drm, + "PSMI capture size requested: %zu bytes, allocated: %lu:%zu\n", + size, id, bo ? xe_bo_size(bo) : 0); + } + + /* on error, reverse what was allocated */ + if (err) + psmi_cleanup(xe); + + return err; +} + +/* + * Returns an address for the capture tool to use to find start of capture + * buffer. Capture tool requires the capability to have a buffer allocated per + * each tile (VRAM region), thus we return an address for each region. + */ +static int psmi_debugfs_capture_addr_show(struct seq_file *m, void *data) +{ + struct xe_device *xe = m->private; + unsigned long id, region_mask; + struct xe_bo *bo; + u64 val; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + /* VRAM region */ + bo = xe->psmi.capture_obj[id]; + if (!bo) + continue; + + /* pinned, so don't need bo_lock */ + val = __xe_bo_addr(bo, 0, PAGE_SIZE); + seq_printf(m, "%ld: 0x%llx\n", id, val); + } + + return 0; +} + +/* + * Return capture buffer size, using the size from first allocated object that + * is found. This works because all objects must be of the same size. + */ +static int psmi_debugfs_capture_size_get(void *data, u64 *val) +{ + unsigned long id, region_mask; + struct xe_device *xe = data; + struct xe_bo *bo; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + *val = xe_bo_size(bo); + return 0; + } + } + + /* no capture objects are allocated */ + *val = 0; + + return 0; +} + +/* + * Set size of PSMI capture buffer. This triggers the allocation of capture + * buffer in each memory region as specified with prior write to + * psmi_capture_region_mask. + */ +static int psmi_debugfs_capture_size_set(void *data, u64 val) +{ + struct xe_device *xe = data; + + /* user must have specified at least one region */ + if (!xe->psmi.region_mask) + return -EINVAL; + + return psmi_resize_object(xe, val); +} + +static int psmi_debugfs_capture_region_mask_get(void *data, u64 *val) +{ + struct xe_device *xe = data; + + *val = xe->psmi.region_mask; + + return 0; +} + +/* + * Select VRAM regions for multi-tile devices, only allowed when buffer is not + * currently allocated. + */ +static int psmi_debugfs_capture_region_mask_set(void *data, u64 region_mask) +{ + struct xe_device *xe = data; + u64 size = 0; + + /* SMEM is not supported (see comments at top of file) */ + if (region_mask & 0x1) + return -EOPNOTSUPP; + + /* input bitmask should contain only valid TTM regions */ + if (!region_mask || region_mask & ~xe->info.mem_region_mask) + return -EINVAL; + + /* only allow setting mask if buffer is not yet allocated */ + psmi_debugfs_capture_size_get(xe, &size); + if (size) + return -EBUSY; + + xe->psmi.region_mask = region_mask; + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(psmi_debugfs_capture_addr); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_region_mask_fops, + psmi_debugfs_capture_region_mask_get, + psmi_debugfs_capture_region_mask_set, + "0x%llx\n"); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_size_fops, + psmi_debugfs_capture_size_get, + psmi_debugfs_capture_size_set, + "%lld\n"); + +void xe_psmi_debugfs_register(struct xe_device *xe) +{ + struct drm_minor *minor; + + if (!psmi_enabled(xe)) + return; + + minor = xe->drm.primary; + if (!minor->debugfs_root) + return; + + debugfs_create_file("psmi_capture_addr", + 0400, minor->debugfs_root, xe, + &psmi_debugfs_capture_addr_fops); + + debugfs_create_file("psmi_capture_region_mask", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_region_mask_fops); + + debugfs_create_file("psmi_capture_size", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_size_fops); +} + +static void psmi_fini(void *arg) +{ + psmi_cleanup(arg); +} + +int xe_psmi_init(struct xe_device *xe) +{ + if (!psmi_enabled(xe)) + return 0; + + return devm_add_action(xe->drm.dev, psmi_fini, xe); +} diff --git a/drivers/gpu/drm/xe/xe_psmi.h b/drivers/gpu/drm/xe/xe_psmi.h new file mode 100644 index 000000000000..b1dfba80d893 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PSMI_H_ +#define _XE_PSMI_H_ + +struct xe_device; + +int xe_psmi_init(struct xe_device *xe); +void xe_psmi_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c8e63bd23300..c129048a9a09 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -13,7 +13,7 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" +#include "xe_tlb_inval_job.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" @@ -21,6 +21,7 @@ #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_svm.h" +#include "xe_tlb_inval_job.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -69,7 +70,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (level > MAX_HUGEPTE_LEVEL) return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); + 0); return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | XE_PTE_NULL; @@ -518,7 +519,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; + u16 pat_index = xe_walk->vma->attr.pat_index; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -616,7 +617,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } @@ -640,28 +641,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * - In all other cases device atomics will be disabled with AE=0 until an application * request differently using a ioctl like madvise. */ -static bool xe_atomic_for_vram(struct xe_vm *vm) +static bool xe_atomic_for_vram(struct xe_vm *vm, struct xe_vma *vma) { + if (vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) + return false; + return true; } -static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo) +static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_vma *vma) { struct xe_device *xe = vm->xe; + struct xe_bo *bo = xe_vma_bo(vma); - if (!xe->info.has_device_atomics_on_smem) + if (!xe->info.has_device_atomics_on_smem || + vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) return false; + if (vma->attr.atomic_access == DRM_XE_ATOMIC_DEVICE) + return true; + /* * If a SMEM+LMEM allocation is backed by SMEM, a device * atomics will cause a gpu page fault and which then * gets migrated to LMEM, bind such allocations with * device atomics enabled. - * - * TODO: Revisit this. Perhaps add something like a - * fault_on_atomics_in_system UAPI flag. - * Note that this also prohibits GPU atomics in LR mode for - * userptr and system memory on DGFX. */ return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || (bo && xe_bo_has_single_placement(bo)))); @@ -744,8 +748,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, goto walk_pt; if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0; - xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ? + xe_walk.default_vram_pte = xe_atomic_for_vram(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; + xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; } @@ -950,7 +954,19 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt = vm->pt_root[tile->id]; u8 pt_mask = (range->tile_present & ~range->tile_invalidated); - xe_svm_assert_in_notifier(vm); + /* + * Locking rules: + * + * - notifier_lock (write): full protection against page table changes + * and MMU notifier invalidations. + * + * - notifier_lock (read) + vm_lock (write): combined protection against + * invalidations and concurrent page table modifications. (e.g., madvise) + * + */ + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0))); if (!(pt_mask & BIT(tile->id))) return false; @@ -1261,6 +1277,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, } static int xe_pt_vm_dependencies(struct xe_sched_job *job, + struct xe_tlb_inval_job *ijob, + struct xe_tlb_inval_job *mjob, struct xe_vm *vm, struct xe_vma_ops *vops, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1328,6 +1346,20 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); + if (job) { + if (ijob) { + err = xe_tlb_inval_job_alloc_dep(ijob); + if (err) + return err; + } + + if (mjob) { + err = xe_tlb_inval_job_alloc_dep(mjob); + if (err) + return err; + } + } + return err; } @@ -1339,7 +1371,8 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[pt_update->tile_id]; - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, + return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob, + pt_update->mjob, vm, pt_update->vops, pt_update_ops, rftree); } @@ -1509,75 +1542,6 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) } #endif -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - xe_gt_tlb_invalidation_fence_signal(&ifence->base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static void invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); -} - struct xe_pt_stage_unbind_walk { /** @base: The pagewalk base-class. */ struct xe_pt_walk base; @@ -2390,6 +2354,15 @@ static const struct xe_migrate_pt_update_ops svm_migrate_ops = { static const struct xe_migrate_pt_update_ops svm_migrate_ops; #endif +static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, + struct xe_gt *gt) +{ + if (xe_gt_is_media_type(gt)) + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler; + + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler; +} + /** * xe_pt_update_ops_run() - Run PT update operations * @tile: Tile of PT update operations @@ -2407,8 +2380,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm *vm = vops->vm; struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence *fence, *ifence, *mfence; + struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; @@ -2440,26 +2413,45 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) #endif if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; + struct xe_exec_queue *q = pt_update_ops->q; + struct xe_dep_scheduler *dep_scheduler = + to_dep_scheduler(q, tile->primary_gt); + + ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, + dep_scheduler, + pt_update_ops->start, + pt_update_ops->last, + vm->usm.asid); + if (IS_ERR(ijob)) { + err = PTR_ERR(ijob); goto kill_vm_tile1; } + update.ijob = ijob; + if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; + dep_scheduler = to_dep_scheduler(q, tile->media_gt); + + mjob = xe_tlb_inval_job_create(q, + &tile->media_gt->tlb_inval, + dep_scheduler, + pt_update_ops->start, + pt_update_ops->last, + vm->usm.asid); + if (IS_ERR(mjob)) { + err = PTR_ERR(mjob); + goto free_ijob; } + update.mjob = mjob; + fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); if (!fences) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } cf = dma_fence_array_alloc(2); if (!cf) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } } } @@ -2467,7 +2459,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); if (!rfence) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } fence = xe_migrate_update_pgtables(tile->migrate, &update); @@ -2491,30 +2483,31 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) pt_update_ops->last, fence)) dma_fence_wait(fence, false); - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; + /* tlb invalidation must be done before signaling unbind/rebind */ + if (ijob) { + struct dma_fence *__fence; + + ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); + __fence = ifence; + + if (mjob) { + fences[0] = ifence; + mfence = xe_tlb_inval_job_push(mjob, tile->migrate, + fence); + fences[1] = mfence; + dma_fence_array_init(cf, 2, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, false); - fence = &cf->base; - } else { - fence = &ifence->base.base; + __fence = &cf->base; } + + dma_fence_put(fence); + fence = __fence; } - if (!mfence) { + if (!mjob) { dma_resv_add_fence(xe_vm_resv(vm), fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : @@ -2523,19 +2516,19 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) list_for_each_entry(op, &vops->list, link) op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), ifence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), mfence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); + op_commit(vops->vm, tile, pt_update_ops, op, ifence, + mfence); } if (pt_update_ops->needs_svm_lock) @@ -2543,15 +2536,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); + return fence; free_rfence: kfree(rfence); -free_ifence: +free_ijob: kfree(cf); kfree(fences); - kfree(mfence); - kfree(ifence); + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); kill_vm_tile1: if (err != -EAGAIN && err != -ENODATA && tile->id) xe_vm_kill(vops->vm, false); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 69eab6f37cfe..17cdd7c7e9f5 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -45,8 +45,7 @@ struct xe_pt_ops { u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, u16 pat_index, u32 pt_level, bool devmem, u64 flags); - u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, - u16 pat_index); + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset); }; struct xe_pt_entry { diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..ca95f2a4d4ef 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -101,7 +101,7 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d517ec9ddcbf..4dbe5732cb7f 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -27,6 +27,7 @@ #include "xe_oa.h" #include "xe_pxp.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wa.h" static const u16 xe_to_user_engine_class[] = { @@ -337,7 +338,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + if (xe->mem.vram) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) @@ -410,7 +411,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->gt_list[iter].near_mem_regions = 0x1; else gt_list->gt_list[iter].near_mem_regions = - BIT(gt_to_tile(gt)->id) << 1; + BIT(gt_to_tile(gt)->mem.vram->id) << 1; gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ gt_list->gt_list[iter].near_mem_regions; @@ -476,7 +477,7 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); /* L3bank mask may not be available for some GTs */ - if (!XE_WA(gt, no_media_l3)) + if (!XE_GT_WA(gt, no_media_l3)) query_size += sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); } @@ -539,7 +540,7 @@ static int query_gt_topology(struct xe_device *xe, * mask, then it's better to omit L3 from the query rather than * reporting bogus or zeroed information to userspace. */ - if (!XE_WA(gt, no_media_l3)) { + if (!XE_GT_WA(gt, no_media_l3)) { topo.type = DRM_XE_TOPO_L3_BANK; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, sizeof(gt->fuse_topo.l3_bank_mask)); @@ -748,10 +749,8 @@ static int query_eu_stall(struct xe_device *xe, u32 num_rates; int ret; - if (!xe_eu_stall_supported_on_platform(xe)) { - drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); + if (!xe_eu_stall_supported_on_platform(xe)) return -ENODEV; - } array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates); size = sizeof(struct drm_xe_query_eu_stall) + array_size; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index d1a403cfb628..4e00008b7081 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -55,8 +55,8 @@ struct xe_res_cursor { u32 mem_type; /** @sgl: Scatterlist for cursor */ struct scatterlist *sgl; - /** @dma_addr: Current element in a struct drm_pagemap_device_addr array */ - const struct drm_pagemap_device_addr *dma_addr; + /** @dma_addr: Current element in a struct drm_pagemap_addr array */ + const struct drm_pagemap_addr *dma_addr; /** @mm: Buddy allocator for VRAM cursor */ struct drm_buddy *mm; /** @@ -170,7 +170,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur) */ static inline void __xe_res_dma_next(struct xe_res_cursor *cur) { - const struct drm_pagemap_device_addr *addr = cur->dma_addr; + const struct drm_pagemap_addr *addr = cur->dma_addr; u64 start = cur->start; while (start >= cur->dma_seg_size) { @@ -222,14 +222,14 @@ static inline void xe_res_first_sg(const struct sg_table *sg, /** * xe_res_first_dma - initialize a xe_res_cursor with dma_addr array * - * @dma_addr: struct drm_pagemap_device_addr array to walk + * @dma_addr: struct drm_pagemap_addr array to walk * @start: Start of the range * @size: Size of the range * @cur: cursor object to initialize * * Start walking over the range of allocations between @start and @size. */ -static inline void xe_res_first_dma(const struct drm_pagemap_device_addr *dma_addr, +static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr, u64 start, u64 size, struct xe_res_cursor *cur) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 7b50c7c1ee21..d71837773d6c 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,10 +110,10 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i) { - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW; + dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0; dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; @@ -179,7 +179,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 flags; - if (XE_WA(gt, 14016712196)) + if (XE_GT_WA(gt, 14016712196)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); @@ -190,7 +190,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - if (XE_WA(gt, 1409600907)) + if (XE_GT_WA(gt, 1409600907)) flags |= PIPE_CONTROL_DEPTH_STALL; if (lacks_render) @@ -206,7 +206,7 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int if (hwe->class != XE_ENGINE_CLASS_RENDER) return i; - if (XE_WA(hwe->gt, 16020292621)) + if (XE_GT_WA(hwe->gt, 16020292621)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, RING_NOPID(hwe->mmio_base).addr, 0); @@ -410,16 +410,14 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(saddr, seqno, dw, i); + i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i); dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; - dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; - dw[i++] = 0; - dw[i++] = seqno; /* value */ + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, + job->migrate_flush_flags, + dw, i); i = emit_user_interrupt(dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 95571b87aa73..47ea1521dc80 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -9,6 +9,7 @@ #include <uapi/drm/xe_drm.h> +#include "xe_configfs.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_macros.h" @@ -363,3 +364,9 @@ bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, { return !IS_SRIOV_VF(gt_to_xe(gt)); } + +bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev)); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 5ed6c14b9ae3..7951fefdbe04 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -477,4 +477,7 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 1d43e183ca21..fedd017d6dd3 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -69,7 +69,6 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 1170ee5a81a8..99dbf0eea540 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -7,6 +7,8 @@ #include <linux/sizes.h> #include <linux/types.h> + +#include "xe_bo.h" #include "xe_sa_types.h" struct dma_fence; @@ -43,9 +45,20 @@ to_xe_sa_manager(struct drm_suballoc_manager *mng) return container_of(mng, struct xe_sa_manager, base); } +/** + * xe_sa_manager_gpu_addr - Retrieve GPU address of a back storage BO + * within suballocator. + * @sa_manager: the &xe_sa_manager struct instance + * Return: GGTT address of the back storage BO. + */ +static inline u64 xe_sa_manager_gpu_addr(struct xe_sa_manager *sa_manager) +{ + return xe_bo_ggtt_addr(sa_manager->bo); +} + static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) { - return to_xe_sa_manager(sa->manager)->gpu_addr + + return xe_sa_manager_gpu_addr(to_xe_sa_manager(sa->manager)) + drm_suballoc_soffset(sa); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2b070ff1292e..cb7238799dcb 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -12,7 +12,6 @@ struct xe_bo; struct xe_sa_manager { struct drm_suballoc_manager base; struct xe_bo *bo; - u64 gpu_addr; void *cpu_ptr; bool is_iomem; }; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index a0eab44c0e76..87911fb4eea7 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -15,6 +15,7 @@ #include "xe_sriov.h" #include "xe_sriov_pf.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -157,3 +158,21 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) strscpy(buf, "PF", size); return buf; } + +/** + * xe_sriov_late_init() - SR-IOV late initialization functions. + * @xe: the &xe_device to initialize + * + * On VF this function will initialize code for CCS migration. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_late_init(struct xe_device *xe) +{ + int err = 0; + + if (IS_VF_CCS_INIT_NEEDED(xe)) + err = xe_sriov_vf_ccs_init(xe); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 688fbabf08f1..0e0c1abf2d14 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -18,6 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); +int xe_sriov_late_init(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 26e243c28994..5de81f213d83 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -11,6 +11,9 @@ #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc_ct.h" +#include "xe_guc_submit.h" +#include "xe_irq.h" +#include "xe_lrc.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" @@ -147,6 +150,56 @@ void xe_sriov_vf_init_early(struct xe_device *xe) xe_sriov_info(xe, "migration not supported by this module version\n"); } +/** + * vf_post_migration_shutdown - Stop the driver activities after VF migration. + * @xe: the &xe_device struct instance + * + * After this VM is migrated and assigned to a new VF, it is running on a new + * hardware, and therefore many hardware-dependent states and related structures + * require fixups. Without fixups, the hardware cannot do any work, and therefore + * all GPU pipelines are stalled. + * Stop some of kernel activities to make the fixup process faster. + */ +static void vf_post_migration_shutdown(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int ret = 0; + + for_each_gt(gt, xe, id) { + xe_guc_submit_pause(>->uc.guc); + ret |= xe_guc_submit_reset_block(>->uc.guc); + } + + if (ret) + drm_info(&xe->drm, "migration recovery encountered ongoing reset\n"); +} + +/** + * vf_post_migration_kickstart - Re-start the driver activities under new hardware. + * @xe: the &xe_device struct instance + * + * After we have finished with all post-migration fixups, restart the driver + * activities to continue feeding the GPU with workloads. + */ +static void vf_post_migration_kickstart(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) { + xe_guc_submit_reset_unblock(>->uc.guc); + xe_guc_submit_unpause(>->uc.guc); + } +} + static bool gt_vf_post_migration_needed(struct xe_gt *gt) { return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); @@ -192,6 +245,11 @@ static int vf_get_next_migrated_gt_id(struct xe_device *xe) return -1; } +static size_t post_migration_scratch_size(struct xe_device *xe) +{ + return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); +} + /** * Perform post-migration fixups on a single GT. * @@ -208,19 +266,31 @@ static int vf_get_next_migrated_gt_id(struct xe_device *xe) static int gt_vf_post_migration_fixups(struct xe_gt *gt) { s64 shift; + void *buf; int err; + buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL); + if (!buf) + return -ENOMEM; + err = xe_gt_sriov_vf_query_config(gt); if (err) - return err; + goto out; shift = xe_gt_sriov_vf_ggtt_shift(gt); if (shift) { xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); - /* FIXME: add the recovery steps */ + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); + err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); + if (err) + goto out; + xe_guc_jobs_ring_rebase(>->uc.guc); xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } - return 0; + +out: + kfree(buf); + return err; } static void vf_post_migration_recovery(struct xe_device *xe) @@ -230,6 +300,7 @@ static void vf_post_migration_recovery(struct xe_device *xe) drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); + vf_post_migration_shutdown(xe); if (!vf_migration_supported(xe)) { xe_sriov_err(xe, "migration not supported by this module version\n"); @@ -247,6 +318,7 @@ static void vf_post_migration_recovery(struct xe_device *xe) set_bit(id, &fixed_gts); } + vf_post_migration_kickstart(xe); err = vf_post_migration_notify_resfix_done(xe, fixed_gts); if (err) goto fail; diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c new file mode 100644 index 000000000000..4872e43eb440 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "instructions/xe_mi_commands.h" +#include "instructions/xe_gpu_commands.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_exec_queue_types.h" +#include "xe_guc_submit.h" +#include "xe_lrc.h" +#include "xe_migrate.h" +#include "xe_sa.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf_ccs.h" +#include "xe_sriov_vf_ccs_types.h" + +/** + * DOC: VF save/restore of compression Meta Data + * + * VF KMD registers two special contexts/LRCAs. + * + * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) save in regular System memory in VM. + * + * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) Restore from regular System memory in + * VM to corresponding CCS pool. + * + * Below diagram explain steps needed for VF save/Restore of compression Meta Data:: + * + * CCS Save CCS Restore VF KMD Guc BCS + * LRCA LRCA + * | | | | | + * | | | | | + * | Create Save LRCA | | | + * [ ]<----------------------------- [ ] | | + * | | | | | + * | | | | | + * | | | Register save LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | Create restore LRCA | | | + * | [ ]<------------------[ ] | | + * | | | | | + * | | | Register restore LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | | | + * | | [ ]------------------------- | | + * | | [ ] Allocate main memory. | | | + * | | [ ] Allocate CCS memory. | | | + * | | [ ] Update Main memory & | | | + * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | | + * | [ ]<------------------[ ] cmds to save & restore.| | | + * | | [ ]<------------------------ | | + * | | | | | + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Paused ------------------------------------- + * | | | | | + * | | | | | + * | | | |Schedule | + * | | | |CCS Save | + * | | | | LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS save | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * : : : : : + * ---------------------------- VM Migrated ----------------------------------- + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Resumed ------------------------------------ + * | | | | | + * | | | | | + * | | [ ]-------------- | | + * | | [ ] Fix up GGTT | | | + * | | [ ]<------------- | | + * | | | | | + * | | | | | + * | | | Notify VF_RESFIX_DONE | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | |Schedule | + * | | | |CCS | + * | | | |Restore | + * | | | |LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS | + * | | | |restore | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * | | | | | + * | | | VF_RESFIX_DONE complete | | + * | | | notification | | + * | | [ ]<---------------------------[ ] | + * | | | | | + * | | | | | + * : : : : : + * ------------------------- Continue VM restore ------------------------------ + */ + +static u64 get_ccs_bb_pool_size(struct xe_device *xe) +{ + u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size; + struct sysinfo si; + + si_meminfo(&si); + sys_mem_size = si.totalram * si.mem_unit; + ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe)); + ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE); + + /** + * We need below BB size to hold PTE mappings and some DWs for copy + * command. In reality, we need space for many copy commands. So, let + * us allocate double the calculated size which is enough to holds GPU + * instructions for the whole region. + */ + bb_pool_size = ptes * sizeof(u32); + + return round_up(bb_pool_size * 2, SZ_1M); +} + +static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_sa_manager *sa_manager; + u64 bb_pool_size; + int offset, err; + + bb_pool_size = get_ccs_bb_pool_size(xe); + xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", + ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); + + sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); + + if (IS_ERR(sa_manager)) { + xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", + sa_manager); + err = PTR_ERR(sa_manager); + return err; + } + + offset = 0; + xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, + bb_pool_size); + + offset = bb_pool_size - sizeof(u32); + xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); + + ctx->mem.ccs_bb_pool = sa_manager; + + return 0; +} + +static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx) +{ + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + u32 dw[10], i = 0; + + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + xe_lrc_write_ring(lrc, dw, i * sizeof(u32)); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); +} + +static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) +{ + int err = -EINVAL; + int ctx_type; + + switch (ctx->ctx_id) { + case XE_SRIOV_VF_CCS_READ_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_SAVE; + break; + case XE_SRIOV_VF_CCS_WRITE_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; + break; + default: + return err; + } + + xe_guc_register_exec_queue(ctx->mig_q, ctx_type); + return 0; +} + +/** + * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc. + * @xe: the &xe_device to register contexts on. + * + * This function registers read and write contexts with Guc. Re-registration + * is needed whenever resuming from pm runtime suspend. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_register_context(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_tile_vf_ccs *ctx; + int err; + + if (!IS_VF_CCS_READY(xe)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &tile->sriov.vf.ccs[ctx_id]; + err = register_save_restore_context(ctx); + if (err) + return err; + } + + return err; +} + +static void xe_sriov_vf_ccs_fini(void *arg) +{ + struct xe_tile_vf_ccs *ctx = arg; + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + + /* + * Make TAIL = HEAD in the ring so that no issues are seen if Guc + * submits this context to HW on VF pause after unbinding device. + */ + xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc)); + xe_exec_queue_put(ctx->mig_q); +} + +/** + * xe_sriov_vf_ccs_init - Setup LRCA for save & restore. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. It initializes + * LRCA and suballocator needed for CCS save & restore. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_tile_vf_ccs *ctx; + struct xe_exec_queue *q; + u32 flags; + int err; + + xe_assert(xe, IS_SRIOV_VF(xe)); + xe_assert(xe, !IS_DGFX(xe)); + xe_assert(xe, xe_device_has_flat_ccs(xe)); + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &tile->sriov.vf.ccs[ctx_id]; + ctx->ctx_id = ctx_id; + + flags = EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE; + q = xe_exec_queue_create_bind(xe, tile, flags, 0); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto err_ret; + } + ctx->mig_q = q; + + err = alloc_bb_pool(tile, ctx); + if (err) + goto err_free_queue; + + ccs_rw_update_ring(ctx); + + err = register_save_restore_context(ctx); + if (err) + goto err_free_queue; + + err = devm_add_action_or_reset(xe->drm.dev, + xe_sriov_vf_ccs_fini, + ctx); + if (err) + goto err_ret; + } + + xe->sriov.vf.ccs.initialized = 1; + + return 0; + +err_free_queue: + xe_exec_queue_put(q); + +err_ret: + return err; +} + +/** + * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. + * @bo: the &buffer object to which batch buffer commands will be added. + * + * This function shall be called only by VF. It inserts the PTEs and copy + * command instructions in the BO by calling xe_migrate_ccs_rw_copy() + * function. + * + * Returns: 0 if successful, negative error code on failure. + */ +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_tile_vf_ccs *ctx; + struct xe_tile *tile; + struct xe_bb *bb; + int err = 0; + + if (!IS_VF_CCS_READY(xe)) + return 0; + + tile = xe_device_get_root_tile(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + /* bb should be NULL here. Assert if not NULL */ + xe_assert(xe, !bb); + + ctx = &tile->sriov.vf.ccs[ctx_id]; + err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); + } + return err; +} + +/** + * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO. + * @bo: the &buffer object from which batch buffer commands will be removed. + * + * This function shall be called only by VF. It removes the PTEs and copy + * command instructions from the BO. Make sure to update the BB with MI_NOOP + * before freeing. + * + * Returns: 0 if successful. + */ +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_bb *bb; + + if (!IS_VF_CCS_READY(xe)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + if (!bb) + continue; + + memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); + xe_bb_free(bb, NULL); + bo->bb_ccs[ctx_id] = NULL; + } + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h new file mode 100644 index 000000000000..1f1baf685fec --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_H_ +#define _XE_SRIOV_VF_CCS_H_ + +struct xe_device; +struct xe_bo; + +int xe_sriov_vf_ccs_init(struct xe_device *xe); +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_register_context(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h new file mode 100644 index 000000000000..93435a6f4cb6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_TYPES_H_ +#define _XE_SRIOV_VF_CCS_TYPES_H_ + +#define for_each_ccs_rw_ctx(id__) \ + for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++) + +#define IS_VF_CCS_READY(xe) ({ \ + struct xe_device *___xe = (xe); \ + xe_assert(___xe, IS_SRIOV_VF(___xe)); \ + ___xe->sriov.vf.ccs.initialized; \ + }) + +#define IS_VF_CCS_INIT_NEEDED(xe) ({\ + struct xe_device *___xe = (xe); \ + IS_SRIOV_VF(___xe) && !IS_DGFX(___xe) && \ + xe_device_has_flat_ccs(___xe) && GRAPHICS_VER(___xe) >= 20; \ + }) + +enum xe_sriov_vf_ccs_rw_ctxs { + XE_SRIOV_VF_CCS_READ_CTX, + XE_SRIOV_VF_CCS_WRITE_CTX, + XE_SRIOV_VF_CCS_CTX_COUNT +}; + +#define IS_VF_CCS_BB_VALID(xe, bo) ({ \ + struct xe_device *___xe = (xe); \ + struct xe_bo *___bo = (bo); \ + IS_SRIOV_VF(___xe) && \ + ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \ + ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \ + }) + +struct xe_migrate; +struct xe_sa_manager; + +struct xe_tile_vf_ccs { + /** @id: Id to which context it belongs to */ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + /** @mig_q: exec queues used for migration */ + struct xe_exec_queue *mig_q; + + struct { + /** @ccs_bb_pool: Pool from which batch buffers are allocated. */ + struct xe_sa_manager *ccs_bb_pool; + } mem; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h index 8300416a6226..24a873c50c49 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -36,6 +36,12 @@ struct xe_device_vf { /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ unsigned long gt_flags; } migration; + + /** @ccs: VF CCS state data */ + struct { + /** @ccs.initialized: Initilalization of VF CCS is completed or not */ + bool initialized; + } ccs; }; #endif diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 41705f5d52e3..7999cc5262a5 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -22,15 +22,18 @@ #define MAX_SCRATCH_MMIO 8 /** - * DOC: Xe Boot Survivability + * DOC: Survivability Mode * - * Boot Survivability is a software based workflow for recovering a system in a failed boot state + * Survivability Mode is a software based workflow for recovering a system in a failed boot state * Here system recoverability is concerned with recovering the firmware responsible for boot. * - * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware - * to be flashed through mei and collect telemetry. The driver's probe flow is modified - * such that it enters survivability mode when pcode initialization is incomplete and boot status - * denotes a failure. + * Boot Survivability + * =================== + * + * Boot Survivability is implemented by loading the driver with bare minimum (no drm card) to allow + * the firmware to be flashed through mei driver and collect telemetry. The driver's probe flow is + * modified such that it enters survivability mode when pcode initialization is incomplete and boot + * status denotes a failure. * * Survivability mode can also be entered manually using the survivability mode attribute available * through configfs which is beneficial in several usecases. It can be used to address scenarios @@ -41,12 +44,14 @@ * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode * + * It is the responsibility of the user to clear the mode once firmware flash is complete. + * * Refer :ref:`xe_configfs` for more details on how to use configfs * * Survivability mode is indicated by the below admin-only readable sysfs which provides additional * debug information:: * - * /sys/bus/pci/devices/<device>/surivability_mode + * /sys/bus/pci/devices/<device>/survivability_mode * * Capability Information: * Provides boot status @@ -56,6 +61,22 @@ * Provides history of previous failures * Auxiliary Information * Certain failures may have information in addition to postcode information + * + * Runtime Survivability + * ===================== + * + * Certain runtime firmware errors can cause the device to enter a wedged state + * (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation. + * Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and + * is indicated by the presence of survivability mode sysfs:: + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * Survivability mode sysfs provides information about the type of survivability mode. + * + * When such errors occur, userspace is notified with the drm device wedged uevent and runtime + * survivability mode. User can then initiate a firmware flash using userspace tools like fwupd + * to restore device to normal operation. */ static u32 aux_history_offset(u32 reg_value) @@ -121,6 +142,14 @@ static void log_survivability_info(struct pci_dev *pdev) } } +static int check_boot_failure(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + + return survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE; +} + static ssize_t survivability_mode_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -130,6 +159,12 @@ static ssize_t survivability_mode_show(struct device *dev, struct xe_survivability_info *info = survivability->info; int index = 0, count = 0; + count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n", + survivability->type ? "Runtime" : "Boot"); + + if (!check_boot_failure(xe)) + return count; + for (index = 0; index < MAX_SCRATCH_MMIO; index++) { if (info[index].reg) count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, @@ -147,16 +182,14 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } -static int enable_survivability_mode(struct pci_dev *pdev) +static int create_survivability_sysfs(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct xe_device *xe = pdev_to_xe_device(pdev); - struct xe_survivability *survivability = &xe->survivability; - int ret = 0; + int ret; /* create survivability mode sysfs */ ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); @@ -170,6 +203,20 @@ static int enable_survivability_mode(struct pci_dev *pdev) if (ret) return ret; + return 0; +} + +static int enable_boot_survivability_mode(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + int ret = 0; + + ret = create_survivability_sysfs(pdev); + if (ret) + return ret; + /* Make sure xe_heci_gsc_init() knows about survivability mode */ survivability->mode = true; @@ -192,15 +239,36 @@ err: return ret; } +static int init_survivability_mode(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info; + + survivability->size = MAX_SCRATCH_MMIO; + + info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + survivability->info = info; + + populate_survivability_info(xe); + + return 0; +} + /** - * xe_survivability_mode_is_enabled - check if survivability mode is enabled + * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled * @xe: xe device instance * - * Returns true if in survivability mode, false otherwise + * Returns true if in boot survivability mode of type, else false */ -bool xe_survivability_mode_is_enabled(struct xe_device *xe) +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe) { - return xe->survivability.mode; + struct xe_survivability *survivability = &xe->survivability; + + return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT; } /** @@ -241,44 +309,78 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); - return survivability->boot_status == NON_CRITICAL_FAILURE || - survivability->boot_status == CRITICAL_FAILURE; + return check_boot_failure(xe); } /** - * xe_survivability_mode_enable - Initialize and enable the survivability mode + * xe_survivability_mode_runtime_enable - Initialize and enable runtime survivability mode * @xe: xe device instance * - * Initialize survivability information and enable survivability mode + * Initialize survivability information and enable runtime survivability mode. + * Runtime survivability mode is enabled when certain errors cause the device to be + * in non-recoverable state. The device is declared wedged with the appropriate + * recovery method and survivability mode sysfs exposed to userspace * - * Return: 0 if survivability mode is enabled or not requested; negative error - * code otherwise. + * Return: 0 if runtime survivability mode is enabled, negative error code otherwise. */ -int xe_survivability_mode_enable(struct xe_device *xe) +int xe_survivability_mode_runtime_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; - if (!xe_survivability_mode_is_requested(xe)) - return 0; + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) { + dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n"); + return -EINVAL; + } - survivability->size = MAX_SCRATCH_MMIO; + ret = init_survivability_mode(xe); + if (ret) + return ret; - info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), - GFP_KERNEL); - if (!info) - return -ENOMEM; + ret = create_survivability_sysfs(pdev); + if (ret) + dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n"); - survivability->info = info; + survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME; + dev_err(&pdev->dev, "Runtime Survivability mode enabled\n"); - populate_survivability_info(xe); + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR); + xe_device_declare_wedged(xe); + dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n"); + + return 0; +} - /* Only log debug information and exit if it is a critical failure */ +/** + * xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode + * @xe: xe device instance + * + * Initialize survivability information and enable boot survivability mode + * + * Return: 0 if boot survivability mode is enabled or not requested, negative error + * code otherwise. + */ +int xe_survivability_mode_boot_enable(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; + + if (!xe_survivability_mode_is_requested(xe)) + return 0; + + ret = init_survivability_mode(xe); + if (ret) + return ret; + + /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */ if (survivability->boot_status == CRITICAL_FAILURE) { log_survivability_info(pdev); return -ENXIO; } - return enable_survivability_mode(pdev); + survivability->type = XE_SURVIVABILITY_TYPE_BOOT; + + return enable_boot_survivability_mode(pdev); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index 02231c2bf008..1cc94226aa82 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -10,8 +10,9 @@ struct xe_device; -int xe_survivability_mode_enable(struct xe_device *xe); -bool xe_survivability_mode_is_enabled(struct xe_device *xe); +int xe_survivability_mode_boot_enable(struct xe_device *xe); +int xe_survivability_mode_runtime_enable(struct xe_device *xe); +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe); bool xe_survivability_mode_is_requested(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h index 19d433e253df..cd65a5d167c9 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -9,6 +9,11 @@ #include <linux/limits.h> #include <linux/types.h> +enum xe_survivability_type { + XE_SURVIVABILITY_TYPE_BOOT, + XE_SURVIVABILITY_TYPE_RUNTIME, +}; + struct xe_survivability_info { char name[NAME_MAX]; u32 reg; @@ -30,6 +35,9 @@ struct xe_survivability { /** @mode: boolean to indicate survivability mode */ bool mode; + + /** @type: survivability type */ + enum xe_survivability_type type; }; #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index a7ff5975873f..76c6d74c1208 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -7,7 +7,6 @@ #include "xe_bo.h" #include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" #include "xe_pm.h" @@ -17,6 +16,7 @@ #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" +#include "xe_vram_types.h" static bool xe_svm_range_in_vram(struct xe_svm_range *range) { @@ -224,7 +224,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask); + err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask); WARN_ON_ONCE(err); range_notifier_event_end: @@ -252,10 +252,56 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm, return 0; } +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end) +{ + struct xe_vma *vma; + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + }; + int err = 0; + + vma = xe_vm_find_vma_by_addr(vm, range_start); + if (!vma) + return -EINVAL; + + if (xe_vma_has_default_mem_attrs(vma)) + return 0; + + vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", + xe_vma_start(vma), xe_vma_end(vma)); + + if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) { + default_attr.pat_index = vma->attr.default_pat_index; + default_attr.default_pat_index = vma->attr.default_pat_index; + vma->attr = default_attr; + } else { + vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx", + range_start, range_end); + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start); + if (err) { + drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err)); + xe_vm_kill(vm, true); + return err; + } + } + + /* + * On call from xe_svm_handle_pagefault original VMA might be changed + * signal this to lookup for VMA again. + */ + return -EAGAIN; +} + static int xe_svm_garbage_collector(struct xe_vm *vm) { struct xe_svm_range *range; - int err; + u64 range_start; + u64 range_end; + int err, ret = 0; lockdep_assert_held_write(&vm->lock); @@ -270,6 +316,9 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) if (!range) break; + range_start = xe_svm_range_start(range); + range_end = xe_svm_range_end(range); + list_del(&range->garbage_collector_link); spin_unlock(&vm->svm.garbage_collector.lock); @@ -282,11 +331,19 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) return err; } + err = xe_svm_range_set_default_attr(vm, range_start, range_end); + if (err) { + if (err == -EAGAIN) + ret = -EAGAIN; + else + return err; + } + spin_lock(&vm->svm.garbage_collector.lock); } spin_unlock(&vm->svm.garbage_collector.lock); - return 0; + return ret; } static void xe_svm_garbage_collector_work_func(struct work_struct *w) @@ -306,21 +363,15 @@ static struct xe_vram_region *page_to_vr(struct page *page) return container_of(page_pgmap(page), struct xe_vram_region, pagemap); } -static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) -{ - return container_of(vr, struct xe_tile, mem.vram); -} - static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, struct page *page) { u64 dpa; - struct xe_tile *tile = vr_to_tile(vr); u64 pfn = page_to_pfn(page); u64 offset; - xe_tile_assert(tile, is_device_private_page(page)); - xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); + xe_assert(vr->xe, is_device_private_page(page)); + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base); offset = (pfn << PAGE_SHIFT) - vr->hpa_base; dpa = vr->dpa_base + offset; @@ -333,11 +384,12 @@ enum xe_svm_copy_dir { XE_SVM_COPY_TO_SRAM, }; -static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, +static int xe_svm_copy(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages, const enum xe_svm_copy_dir dir) { struct xe_vram_region *vr = NULL; - struct xe_tile *tile; + struct xe_device *xe; struct dma_fence *fence = NULL; unsigned long i; #define XE_VRAM_ADDR_INVALID ~0x0ull @@ -365,12 +417,12 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, last = (i + 1) == npages; /* No CPU page and no device pages queue'd to copy */ - if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) + if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID) continue; if (!vr && spage) { vr = page_to_vr(spage); - tile = vr_to_tile(vr); + xe = vr->xe; } XE_WARN_ON(spage && page_to_vr(spage) != vr); @@ -379,7 +431,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, * first device page, check if physical contiguous on subsequent * device pages. */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { __vram_addr = xe_vram_region_page_to_dpa(vr, spage); if (vram_addr == XE_VRAM_ADDR_INVALID) { vram_addr = __vram_addr; @@ -387,6 +439,14 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, } match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; + /* Expected with contiguous memory */ + xe_assert(vr->xe, match); + + if (pagemap_addr[i].order) { + i += NR_PAGES(pagemap_addr[i].order) - 1; + chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); + last = (i + 1) == npages; + } } /* @@ -402,20 +462,22 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, if (vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - vram_addr, (u64)dma_addr[pos], i - pos + incr); - __fence = xe_migrate_from_vram(tile->migrate, + vram_addr, + (u64)pagemap_addr[pos].addr, i - pos + incr); + __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, - dma_addr + pos); + &pagemap_addr[pos]); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - (u64)dma_addr[pos], vram_addr, i - pos + incr); - __fence = xe_migrate_to_vram(tile->migrate, + (u64)pagemap_addr[pos].addr, vram_addr, + i - pos + incr); + __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, - dma_addr + pos, + &pagemap_addr[pos], vram_addr); } if (IS_ERR(__fence)) { @@ -428,7 +490,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, } /* Setup physical address of next device page */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { vram_addr = __vram_addr; pos = i; } else { @@ -438,18 +500,18 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, /* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - vram_addr, (u64)dma_addr[pos], 1); - __fence = xe_migrate_from_vram(tile->migrate, 1, + vram_addr, (u64)pagemap_addr[pos].addr, 1); + __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, - dma_addr + pos); + &pagemap_addr[pos]); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - (u64)dma_addr[pos], vram_addr, 1); - __fence = xe_migrate_to_vram(tile->migrate, 1, - dma_addr + pos, + (u64)pagemap_addr[pos].addr, vram_addr, 1); + __fence = xe_migrate_to_vram(vr->migrate, 1, + &pagemap_addr[pos], vram_addr); } if (IS_ERR(__fence)) { @@ -475,16 +537,18 @@ err_out: #undef XE_VRAM_ADDR_INVALID } -static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, +static int xe_svm_copy_to_devmem(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM); } -static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, +static int xe_svm_copy_to_ram(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM); } static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) @@ -506,9 +570,9 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) return PHYS_PFN(offset + vr->hpa_base); } -static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) +static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) { - return &tile->mem.vram.ttm.mm; + return &vram->ttm.mm; } static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, @@ -522,8 +586,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct xe_tile *tile = vr_to_tile(vr); - struct drm_buddy *buddy = tile_to_buddy(tile); + struct drm_buddy *buddy = vram_to_buddy(vr); u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); int i; @@ -683,20 +746,14 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *v } #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) -{ - return &tile->mem.vram; -} - static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long start, unsigned long end, struct mm_struct *mm, unsigned long timeslice_ms) { - struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); - struct xe_device *xe = tile_to_xe(tile); + struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; - struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; @@ -709,9 +766,9 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, xe_pm_runtime_get(xe); retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, + bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start, ttm_bo_type_device, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); @@ -721,9 +778,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, } drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, - &dpagemap_devmem_ops, - &tile->mem.vram.dpagemap, - end - start); + &dpagemap_devmem_ops, dpagemap, end - start); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) @@ -790,22 +845,9 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm return true; } -/** - * xe_svm_handle_pagefault() - SVM handle page fault - * @vm: The VM. - * @vma: The CPU address mirror VMA. - * @gt: The gt upon the fault occurred. - * @fault_addr: The GPU fault address. - * @atomic: The fault atomic access bit. - * - * Create GPU bindings for a SVM page fault. Optionally migrate to device - * memory. - * - * Return: 0 on success, negative error code on error. - */ -int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_gt *gt, u64 fault_addr, - bool atomic) +static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool need_vram) { struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), @@ -813,14 +855,14 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, - .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), - .timeslice_ms = atomic && IS_DGFX(vm->xe) && + .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), + .timeslice_ms = need_vram && IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? vm->xe->atomic_svm_timeslice_ms : 0, }; struct xe_svm_range *range; struct dma_fence *fence; + struct drm_pagemap *dpagemap; struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; @@ -850,8 +892,14 @@ retry: range_debug(range, "PAGE FAULT"); + dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { + xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { + /* TODO : For multi-device dpagemap will be used to find the + * remote tile and remote device. Will need to modify + * xe_svm_alloc_vram to use dpagemap for future multi-device + * support. + */ err = xe_svm_alloc_vram(tile, range, &ctx); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { @@ -919,6 +967,45 @@ err_out: } /** + * xe_svm_handle_pagefault() - SVM handle page fault + * @vm: The VM. + * @vma: The CPU address mirror VMA. + * @gt: The gt upon the fault occurred. + * @fault_addr: The GPU fault address. + * @atomic: The fault atomic access bit. + * + * Create GPU bindings for a SVM page fault. Optionally migrate to device + * memory. + * + * Return: 0 on success, negative error code on error. + */ +int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool atomic) +{ + int need_vram, ret; +retry: + need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (need_vram < 0) + return need_vram; + + ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, + need_vram ? true : false); + if (ret == -EAGAIN) { + /* + * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA + * may have been split by xe_svm_range_set_default_attr. + */ + vma = xe_vm_find_vma_by_addr(vm, fault_addr); + if (!vma) + return -EINVAL; + + goto retry; + } + return ret; +} + +/** * xe_svm_has_mapping() - SVM has mappings * @vm: The VM. * @start: Start address. @@ -934,6 +1021,41 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) } /** + * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges + * @vm: The VM + * @start: start addr + * @end: end addr + * + * This function UNMAPS svm ranges if start or end address are inside them. + */ +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier, *next; + + lockdep_assert_held_write(&vm->lock); + + drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *range, *__next; + + drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) { + if (start > drm_gpusvm_range_start(range) || + end < drm_gpusvm_range_end(range)) { + if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range))) + drm_gpusvm_range_evict(&vm->svm.gpusvm, range); + drm_gpusvm_range_get(range); + __xe_svm_garbage_collector(vm, to_xe_range(range)); + if (!list_empty(&to_xe_range(range)->garbage_collector_link)) { + spin_lock(&vm->svm.garbage_collector.lock); + list_del(&to_xe_range(range)->garbage_collector_link); + spin_unlock(&vm->svm.garbage_collector.lock); + } + drm_gpusvm_range_put(range); + } + } + } +} + +/** * xe_svm_bo_evict() - SVM evict BO to system memory * @bo: BO to evict * @@ -997,8 +1119,94 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, return err; } +/** + * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range + * @vm: Pointer to the xe_vm structure + * @start: Start of the input range + * @end: End of the input range + * + * This function removes the page table entries (PTEs) associated + * with the svm ranges within the given input start and end + * + * Return: tile_mask for which gt's need to be tlb invalidated. + */ +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier; + struct xe_svm_range *range; + u64 adj_start, adj_end; + struct xe_tile *tile; + u8 tile_mask = 0; + u8 id; + + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0)); + + drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *r = NULL; + + adj_start = max(start, drm_gpusvm_notifier_start(notifier)); + adj_end = min(end, drm_gpusvm_notifier_end(notifier)); + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { + range = to_xe_range(r); + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes_range(tile, vm, range)) { + tile_mask |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping(). + * Must not fail after setting + * tile_invalidated and before + * TLB invalidation. + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram->dpagemap; +} + +/** + * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA + * @vma: Pointer to the xe_vma structure containing memory attributes + * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping + * + * This function determines the correct DRM pagemap to use for a given VMA. + * It first checks if a valid devmem_fd is provided in the VMA's preferred + * location. If the devmem_fd is negative, it returns NULL, indicating no + * pagemap is available and smem to be used as preferred location. + * If the devmem_fd is equal to the default faulting + * GT identifier, it returns the VRAM pagemap associated with the tile. + * + * Future support for multi-device configurations may use drm_pagemap_from_fd() + * to resolve pagemaps from arbitrary file descriptors. + * + * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable. + */ +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + s32 fd = (s32)vma->attr.preferred_loc.devmem_fd; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) + return NULL; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) + return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL; + + /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */ + return NULL; +} + /** * xe_svm_alloc_vram()- Allocate device memory pages for range, * migrating existing data. @@ -1016,14 +1224,14 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM"); - dpagemap = xe_tile_local_pagemap(tile); + dpagemap = tile_local_pagemap(tile); return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), xe_svm_range_end(range), range->base.gpusvm->mm, ctx->timeslice_ms); } -static struct drm_pagemap_device_addr +static struct drm_pagemap_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, struct page *page, @@ -1042,7 +1250,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, prot = 0; } - return drm_pagemap_device_addr_encode(addr, prot, order, dir); + return drm_pagemap_addr_encode(addr, prot, order, dir); } static const struct drm_pagemap_ops xe_drm_pagemap_ops = { @@ -1111,6 +1319,11 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) { return 0; } + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} #endif /** diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index da9a69ea0bb1..9d6a8840a8b7 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -90,6 +90,12 @@ bool xe_svm_range_validate(struct xe_vm *vm, u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end); + +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end); + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile); + /** * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping * @range: SVM range @@ -163,7 +169,7 @@ void xe_svm_flush(struct xe_vm *vm); #else #include <linux/interval_tree.h> -struct drm_pagemap_device_addr; +struct drm_pagemap_addr; struct drm_gpusvm_ctx; struct drm_gpusvm_range; struct xe_bo; @@ -178,7 +184,7 @@ struct xe_vram_region; struct xe_svm_range { struct { struct interval_tree_node itree; - const struct drm_pagemap_device_addr *dma_addr; + const struct drm_pagemap_addr *dma_addr; } base; u32 tile_present; u32 tile_invalidated; @@ -303,6 +309,23 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vm return ULONG_MAX; } +static inline +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ +} + +static inline +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + return 0; +} + +static inline +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} + #define xe_svm_assert_in_notifier(...) do {} while (0) #define xe_svm_range_has_dma_mapping(...) false diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..82872a51f098 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 86e9811e60ba..d49ba3401963 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,6 +7,7 @@ #include <drm/drm_managed.h> +#include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" @@ -19,6 +20,8 @@ #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h" +#include "xe_vram_types.h" /** * DOC: Multi-tile Design @@ -92,6 +95,35 @@ static int xe_tile_alloc(struct xe_tile *tile) if (!tile->mem.ggtt) return -ENOMEM; + tile->migrate = xe_migrate_alloc(tile); + if (!tile->migrate) + return -ENOMEM; + + return 0; +} + +/** + * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation + * @tile: Tile to perform allocations for + * + * Allocates VRAM per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +int xe_tile_alloc_vram(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id); + if (!vram) + return -ENOMEM; + tile->mem.vram = vram; + return 0; } @@ -127,21 +159,6 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) } ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ -static int tile_ttm_mgr_init(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (tile->mem.vram.usable_size) { - err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); - if (err) - return err; - xe->info.mem_region_mask |= BIT(tile->id) << 1; - } - - return 0; -} - /** * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. * @tile: The tile to initialize. @@ -159,16 +176,19 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) int xe_tile_init_noalloc(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); - int err; - - err = tile_ttm_mgr_init(tile); - if (err) - return err; xe_wa_apply_tile_workarounds(tile); if (xe->info.has_usm && IS_DGFX(xe)) - xe_devm_add(tile, &tile->mem.vram); + xe_devm_add(tile, tile->mem.vram); + + if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { + int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); + + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; + } return xe_tile_sysfs_init(tile); } diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index cc33e8733983..dceb6297aa01 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -14,19 +14,9 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); int xe_tile_init(struct xe_tile *tile); -void xe_tile_migrate_wait(struct xe_tile *tile); +int xe_tile_alloc_vram(struct xe_tile *tile); -#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) -{ - return &tile->mem.vram.dpagemap; -} -#else -static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) -{ - return NULL; -} -#endif +void xe_tile_migrate_wait(struct xe_tile *tile); static inline bool xe_tile_is_root(struct xe_tile *tile) { diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c new file mode 100644 index 000000000000..e6e97b5a7b5c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_gt_stats.h" +#include "xe_tlb_inval.h" +#include "xe_mmio.h" +#include "xe_pm.h" +#include "xe_tlb_inval.h" +#include "xe_trace.h" + +/** + * DOC: Xe TLB invalidation + * + * Xe TLB invalidation is implemented in two layers. The first is the frontend + * API, which provides an interface for TLB invalidations to the driver code. + * The frontend handles seqno assignment, synchronization (fences), and the + * timeout mechanism. The frontend is implemented via an embedded structure + * xe_tlb_inval that includes a set of ops hooking into the backend. The backend + * interacts with the hardware (or firmware) to perform the actual invalidation. + */ + +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + +static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) +{ + if (WARN_ON_ONCE(!fence->tlb_inval)) + return; + + xe_pm_runtime_put(fence->tlb_inval->xe); + fence->tlb_inval = NULL; /* fini() should be called once */ +} + +static void +xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + lockdep_assert_held(&fence->tlb_inval->pending_lock); + + list_del(&fence->link); + trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); + xe_tlb_inval_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + spin_lock_irq(&tlb_inval->pending_lock); + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +static void xe_tlb_inval_fence_timeout(struct work_struct *work) +{ + struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, + fence_tdr.work); + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); + + tlb_inval->ops->flush(tlb_inval); + + spin_lock_irq(&tlb_inval->pending_lock); + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + s64 since_inval_ms = ktime_ms_delta(ktime_get(), + fence->inval_time); + + if (msecs_to_jiffies(since_inval_ms) < timeout_delay) + break; + + trace_xe_tlb_inval_fence_timeout(xe, fence); + drm_err(&xe->drm, + "TLB invalidation fence timeout, seqno=%d recv=%d", + fence->seqno, tlb_inval->seqno_recv); + + fence->base.error = -ETIME; + xe_tlb_inval_fence_signal(fence); + } + if (!list_empty(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + timeout_delay); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +/** + * tlb_inval_fini - Clean up TLB invalidation state + * @drm: @drm_device + * @arg: pointer to struct @xe_tlb_inval + * + * Cancel pending fence workers and clean up any additional + * TLB invalidation state. + */ +static void tlb_inval_fini(struct drm_device *drm, void *arg) +{ + struct xe_tlb_inval *tlb_inval = arg; + + xe_tlb_inval_reset(tlb_inval); +} + +/** + * xe_gt_tlb_inval_init - Initialize TLB invalidation state + * @gt: GT structure + * + * Initialize TLB invalidation state, purely software initialization, should + * be called once during driver load. + * + * Return: 0 on success, negative error code on error. + */ +int xe_gt_tlb_inval_init_early(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tlb_inval *tlb_inval = >->tlb_inval; + int err; + + tlb_inval->xe = xe; + tlb_inval->seqno = 1; + INIT_LIST_HEAD(&tlb_inval->pending_fences); + spin_lock_init(&tlb_inval->pending_lock); + spin_lock_init(&tlb_inval->lock); + INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); + + err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); + if (err) + return err; + + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, + "gt-tbl-inval-job-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(tlb_inval->job_wq)) + return PTR_ERR(tlb_inval->job_wq); + + /* XXX: Blindly setting up backend to GuC */ + xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); + + return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); +} + +/** + * xe_tlb_inval_reset() - TLB invalidation reset + * @tlb_inval: TLB invalidation client + * + * Signal any pending invalidation fences, should be called during a GT reset + */ +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence *fence, *next; + int pending_seqno; + + /* + * we can get here before the backends are even initialized if we're + * wedging very early, in which case there are not going to be any + * pendind fences so we can bail immediately. + */ + if (!tlb_inval->ops->initialized(tlb_inval)) + return; + + /* + * Backend is already disabled at this point. No new TLB requests can + * appear. + */ + + mutex_lock(&tlb_inval->seqno_lock); + spin_lock_irq(&tlb_inval->pending_lock); + cancel_delayed_work(&tlb_inval->fence_tdr); + /* + * We might have various kworkers waiting for TLB flushes to complete + * which are not tracked with an explicit TLB fence, however at this + * stage that will never happen since the backend is already disabled, + * so make sure we signal them here under the assumption that we have + * completed a full GT reset. + */ + if (tlb_inval->seqno == 1) + pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; + else + pending_seqno = tlb_inval->seqno - 1; + WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); + mutex_unlock(&tlb_inval->seqno_lock); +} + +static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) +{ + int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); + + lockdep_assert_held(&tlb_inval->pending_lock); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; + + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return seqno_recv >= seqno; +} + +static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + fence->seqno = tlb_inval->seqno; + trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); + + spin_lock_irq(&tlb_inval->pending_lock); + fence->inval_time = ktime_get(); + list_add_tail(&fence->link, &tlb_inval->pending_fences); + + if (list_is_singular(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + spin_unlock_irq(&tlb_inval->pending_lock); + + tlb_inval->seqno = (tlb_inval->seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!tlb_inval->seqno) + tlb_inval->seqno = 1; +} + +#define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ +({ \ + int __ret; \ + \ + xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ + xe_assert((__tlb_inval)->xe, (__fence)); \ + \ + mutex_lock(&(__tlb_inval)->seqno_lock); \ + xe_tlb_inval_fence_prep((__fence)); \ + __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ + if (__ret < 0) \ + xe_tlb_inval_fence_signal_unlocked((__fence)); \ + mutex_unlock(&(__tlb_inval)->seqno_lock); \ + \ + __ret == -ECANCELED ? 0 : __ret; \ +}) + +/** + * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * + * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); +} + +/** + * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT + * @tlb_inval: TLB invalidation client + * + * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence fence, *fence_ptr = &fence; + int ret; + + xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); + ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); + xe_tlb_inval_fence_wait(fence_ptr); + + return ret; +} + +/** + * xe_tlb_inval_range() - Issue a TLB invalidation for an address range + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * @start: start address + * @end: end address + * @asid: address space id + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. + * + * Return: Negative error code on error, 0 on success + */ +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, u64 start, u64 end, + u32 asid) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, + start, end, asid); +} + +/** + * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM + * @tlb_inval: TLB invalidation client + * @vm: VM to invalidate + * + * Invalidate entire VM's address space + */ +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) +{ + struct xe_tlb_inval_fence fence; + u64 range = 1ull << vm->xe->info.va_bits; + + xe_tlb_inval_fence_init(tlb_inval, &fence, true); + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); + xe_tlb_inval_fence_wait(&fence); +} + +/** + * xe_tlb_inval_done_handler() - TLB invalidation done handler + * @tlb_inval: TLB invalidation client + * @seqno: seqno of invalidation that is done + * + * Update recv seqno, signal any TLB invalidation fences, and restart TDR + */ +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) +{ + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + unsigned long flags; + + /* + * This can also be run both directly from the IRQ handler and also in + * process_g2h_msg(). Only one may process any individual CT message, + * however the order they are processed here could result in skipping a + * seqno. To handle that we just process all the seqnos from the last + * seqno_recv up to and including the one in msg[0]. The delta should be + * very small so there shouldn't be much of pending_fences we actually + * need to iterate over here. + * + * From GuC POV we expect the seqnos to always appear in-order, so if we + * see something later in the timeline we can be sure that anything + * appearing earlier has already signalled, just that we have yet to + * officially process the CT message like if racing against + * process_g2h_msg(). + */ + spin_lock_irqsave(&tlb_inval->pending_lock, flags); + if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); + return; + } + + WRITE_ONCE(tlb_inval->seqno_recv, seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + trace_xe_tlb_inval_fence_recv(xe, fence); + + if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) + break; + + xe_tlb_inval_fence_signal(fence); + } + + if (!list_empty(&tlb_inval->pending_fences)) + mod_delayed_work(system_wq, + &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + else + cancel_delayed_work(&tlb_inval->fence_tdr); + + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); +} + +static const char * +xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "tlb_inval_fence"; +} + +static const struct dma_fence_ops inval_fence_ops = { + .get_driver_name = xe_inval_fence_get_driver_name, + .get_timeline_name = xe_inval_fence_get_timeline_name, +}; + +/** + * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence + * @tlb_inval: TLB invalidation client + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini + * will be automatically called when fence is signalled (all fences must signal), + * even on error. + */ +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(tlb_inval->xe); + + spin_lock_irq(&tlb_inval->lock); + dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(&tlb_inval->lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->tlb_inval = tlb_inval; +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h new file mode 100644 index 000000000000..554634dfd4e2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_H_ +#define _XE_TLB_INVAL_H_ + +#include <linux/types.h> + +#include "xe_tlb_inval_types.h" + +struct xe_gt; +struct xe_guc; +struct xe_vm; + +int xe_gt_tlb_inval_init_early(struct xe_gt *gt); + +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval); +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence); +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + u64 start, u64 end, u32 asid); + +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack); + +/** + * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait + * @fence: TLB invalidation fence to wait on + * + * Wait on a TLB invalidiation fence until it signals, non interruptable + */ +static inline void +xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); + +#endif /* _XE_TLB_INVAL_ */ diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c new file mode 100644 index 000000000000..492def04a559 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_exec_queue.h" +#include "xe_gt_types.h" +#include "xe_tlb_inval.h" +#include "xe_tlb_inval_job.h" +#include "xe_migrate.h" +#include "xe_pm.h" + +/** struct xe_tlb_inval_job - TLB invalidation job */ +struct xe_tlb_inval_job { + /** @dep: base generic dependency Xe job */ + struct xe_dep_job dep; + /** @tlb_inval: TLB invalidation client */ + struct xe_tlb_inval *tlb_inval; + /** @q: exec queue issuing the invalidate */ + struct xe_exec_queue *q; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ + struct dma_fence *fence; + /** @start: Start address to invalidate */ + u64 start; + /** @end: End address to invalidate */ + u64 end; + /** @asid: Address space ID to invalidate */ + u32 asid; + /** @fence_armed: Fence has been armed */ + bool fence_armed; +}; + +static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + xe_tlb_inval_range(job->tlb_inval, ifence, job->start, + job->end, job->asid); + + return job->fence; +} + +static void xe_tlb_inval_job_free(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + + /* Pairs with get in xe_tlb_inval_job_push */ + xe_tlb_inval_job_put(job); +} + +static const struct xe_dep_job_ops dep_job_ops = { + .run_job = xe_tlb_inval_job_run, + .free_job = xe_tlb_inval_job_free, +}; + +/** + * xe_tlb_inval_job_create() - TLB invalidation job create + * @q: exec queue issuing the invalidate + * @tlb_inval: TLB invalidation client + * @dep_scheduler: Dependency scheduler for job + * @start: Start address to invalidate + * @end: End address to invalidate + * @asid: Address space ID to invalidate + * + * Create a TLB invalidation job and initialize internal fields. The caller is + * responsible for releasing the creation reference. + * + * Return: TLB invalidation job object on success, ERR_PTR failure + */ +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, u64 start, + u64 end, u32 asid) +{ + struct xe_tlb_inval_job *job; + struct drm_sched_entity *entity = + xe_dep_scheduler_entity(dep_scheduler); + struct xe_tlb_inval_fence *ifence; + int err; + + job = kmalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return ERR_PTR(-ENOMEM); + + job->q = q; + job->tlb_inval = tlb_inval; + job->start = start; + job->end = end; + job->asid = asid; + job->fence_armed = false; + job->dep.ops = &dep_job_ops; + kref_init(&job->refcount); + xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */ + + ifence = kmalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) { + err = -ENOMEM; + goto err_job; + } + job->fence = &ifence->base; + + err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); + if (err) + goto err_fence; + + /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_pm_runtime_get_noresume(gt_to_xe(q->gt)); + + return job; + +err_fence: + kfree(ifence); +err_job: + xe_exec_queue_put(q); + kfree(job); + + return ERR_PTR(err); +} + +static void xe_tlb_inval_job_destroy(struct kref *ref) +{ + struct xe_tlb_inval_job *job = container_of(ref, typeof(*job), + refcount); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + struct xe_exec_queue *q = job->q; + struct xe_device *xe = gt_to_xe(q->gt); + + if (!job->fence_armed) + kfree(ifence); + else + /* Ref from xe_tlb_inval_fence_init */ + dma_fence_put(job->fence); + + drm_sched_job_cleanup(&job->dep.drm); + kfree(job); + xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */ + xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */ +} + +/** + * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency + * @job: TLB invalidation job to alloc dependency for + * + * Allocate storage for a dependency in the TLB invalidation fence. This + * function should be called at most once per job and must be paired with + * xe_tlb_inval_job_push being called with a real fence. + * + * Return: 0 on success, -errno on failure + */ +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job) +{ + xe_assert(gt_to_xe(job->q->gt), !xa_load(&job->dep.drm.dependencies, 0)); + might_alloc(GFP_KERNEL); + + return drm_sched_job_add_dependency(&job->dep.drm, + dma_fence_get_stub()); +} + +/** + * xe_tlb_inval_job_push() - TLB invalidation job push + * @job: TLB invalidation job to push + * @m: The migration object being used + * @fence: Dependency for TLB invalidation job + * + * Pushes a TLB invalidation job for execution, using @fence as a dependency. + * Storage for @fence must be preallocated with xe_tlb_inval_job_alloc_dep + * prior to this call if @fence is not signaled. Takes a reference to the job’s + * finished fence, which the caller is responsible for releasing, and return it + * to the caller. This function is safe to be called in the path of reclaim. + * + * Return: Job's finished fence on success, cannot fail + */ +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence) +{ + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + if (!dma_fence_is_signaled(fence)) { + void *ptr; + + /* + * Can be in path of reclaim, hence the preallocation of fence + * storage in xe_tlb_inval_job_alloc_dep. Verify caller did + * this correctly. + */ + xe_assert(gt_to_xe(job->q->gt), + xa_load(&job->dep.drm.dependencies, 0) == + dma_fence_get_stub()); + + dma_fence_get(fence); /* ref released once dependency processed by scheduler */ + ptr = xa_store(&job->dep.drm.dependencies, 0, fence, + GFP_ATOMIC); + xe_assert(gt_to_xe(job->q->gt), !xa_is_err(ptr)); + } + + xe_tlb_inval_job_get(job); /* Pairs with put in free_job */ + job->fence_armed = true; + + /* + * We need the migration lock to protect the job's seqno and the spsc + * queue, only taken on migration queue, user queues protected dma-resv + * VM lock. + */ + xe_migrate_job_lock(m, job->q); + + /* Creation ref pairs with put in xe_tlb_inval_job_destroy */ + xe_tlb_inval_fence_init(job->tlb_inval, ifence, false); + dma_fence_get(job->fence); /* Pairs with put in DRM scheduler */ + + drm_sched_job_arm(&job->dep.drm); + /* + * caller ref, get must be done before job push as it could immediately + * signal and free. + */ + dma_fence_get(&job->dep.drm.s_fence->finished); + drm_sched_entity_push_job(&job->dep.drm); + + xe_migrate_job_unlock(m, job->q); + + /* + * Not using job->fence, as it has its own dma-fence context, which does + * not allow TLB invalidation fences on the same queue, GT tuple to + * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler + * context and job's finished fence, which enables squashing. + */ + return &job->dep.drm.s_fence->finished; +} + +/** + * xe_tlb_inval_job_get() - Get a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Increment the TLB invalidation job's reference count + */ +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job) +{ + kref_get(&job->refcount); +} + +/** + * xe_tlb_inval_job_put() - Put a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Decrement the TLB invalidation job's reference count, call + * xe_tlb_inval_job_destroy when reference count == 0. Skips decrement if + * input @job is NULL or IS_ERR. + */ +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job) +{ + if (!IS_ERR_OR_NULL(job)) + kref_put(&job->refcount, xe_tlb_inval_job_destroy); +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h new file mode 100644 index 000000000000..e63edcb26b50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_JOB_H_ +#define _XE_TLB_INVAL_JOB_H_ + +#include <linux/types.h> + +struct dma_fence; +struct xe_dep_scheduler; +struct xe_exec_queue; +struct xe_tlb_inval; +struct xe_tlb_inval_job; +struct xe_migrate; + +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, + u64 start, u64 end, u32 asid); + +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); + +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence); + +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job); + +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h new file mode 100644 index 000000000000..8f8b060e9005 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_TYPES_H_ +#define _XE_TLB_INVAL_TYPES_H_ + +#include <linux/workqueue.h> +#include <linux/dma-fence.h> + +struct xe_tlb_inval; + +/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ +struct xe_tlb_inval_ops { + /** + * @all: Invalidate all TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*all)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ggtt: Invalidate global translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ggtt)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ppgtt: Invalidate per-process translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * @start: Start address + * @end: End address + * @asid: Address space ID + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, + u64 end, u32 asid); + + /** + * @initialized: Backend is initialized + * @tlb_inval: TLB invalidation client + * + * Return: True if back is initialized, False otherwise + */ + bool (*initialized)(struct xe_tlb_inval *tlb_inval); + + /** + * @flush: Flush pending TLB invalidations + * @tlb_inval: TLB invalidation client + */ + void (*flush)(struct xe_tlb_inval *tlb_inval); + + /** + * @timeout_delay: Timeout delay for TLB invalidation + * @tlb_inval: TLB invalidation client + * + * Return: Timeout delay for TLB invalidation in jiffies + */ + long (*timeout_delay)(struct xe_tlb_inval *tlb_inval); +}; + +/** struct xe_tlb_inval - TLB invalidation client (frontend) */ +struct xe_tlb_inval { + /** @private: Backend private pointer */ + void *private; + /** @xe: Pointer to Xe device */ + struct xe_device *xe; + /** @ops: TLB invalidation ops */ + const struct xe_tlb_inval_ops *ops; + /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int seqno; + /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */ + struct mutex seqno_lock; + /** + * @seqno_recv: last received TLB invalidation seqno, protected by + * CT lock + */ + int seqno_recv; + /** + * @pending_fences: list of pending fences waiting TLB invaliations, + * protected CT lock + */ + struct list_head pending_fences; + /** + * @pending_lock: protects @pending_fences and updating @seqno_recv. + */ + spinlock_t pending_lock; + /** + * @fence_tdr: schedules a delayed call to xe_tlb_fence_timeout after + * the timeout interval is over. + */ + struct delayed_work fence_tdr; + /** @job_wq: schedules TLB invalidation jobs */ + struct workqueue_struct *job_wq; + /** @tlb_inval.lock: protects TLB invalidation fences */ + spinlock_t lock; +}; + +/** + * struct xe_tlb_inval_fence - TLB invalidation fence + * + * Optionally passed to xe_tlb_inval* functions and will be signaled upon TLB + * invalidation completion. + */ +struct xe_tlb_inval_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @tlb_inval: TLB invalidation client which fence belong to */ + struct xe_tlb_inval *tlb_inval; + /** @link: link into list of pending tlb fences */ + struct list_head link; + /** @seqno: seqno of TLB invalidation to signal fence one */ + int seqno; + /** @inval_time: time of TLB invalidation */ + ktime_t inval_time; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index b4a3577df70c..314f42fcbcbd 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -14,10 +14,10 @@ #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" -#include "xe_gt_tlb_invalidation_types.h" #include "xe_gt_types.h" #include "xe_guc_exec_queue_types.h" #include "xe_sched_job.h" +#include "xe_tlb_inval_types.h" #include "xe_vm.h" #define __dev_name_xe(xe) dev_name((xe)->drm.dev) @@ -25,13 +25,13 @@ #define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) #define __dev_name_eq(q) __dev_name_gt((q)->gt) -DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DECLARE_EVENT_CLASS(xe_tlb_inval_fence, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence), TP_STRUCT__entry( __string(dev, __dev_name_xe(xe)) - __field(struct xe_gt_tlb_invalidation_fence *, fence) + __field(struct xe_tlb_inval_fence *, fence) __field(int, seqno) ), @@ -45,39 +45,23 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, __get_str(dev), __entry->fence, __entry->seqno) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_send, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, - xe_gt_tlb_invalidation_fence_work_func, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_recv, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_signal, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_timeout, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d9c9d2547aad..dc588255674d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -25,6 +25,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h" struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; @@ -82,15 +83,16 @@ static u32 get_wopcm_size(struct xe_device *xe) static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; + resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; - tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; - tile_size = tile->mem.vram.actual_physical_size; + tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); + tile_size = xe_vram_region_actual_physical_size(tile_vram); /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; @@ -107,7 +109,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) - mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; + mgr->io_base = tile_io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end @@ -164,7 +166,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr stolen_size -= wopcm_size; - if (media_gt && XE_WA(media_gt, 14019821291)) { + if (media_gt && XE_GT_WA(media_gt, 14019821291)) { u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) & ~GENMASK_ULL(5, 0); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9e375a40aee9..9175b4a2214b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -15,6 +15,7 @@ #include "xe_gt.h" #include "xe_res_cursor.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" static inline struct drm_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) @@ -337,13 +338,20 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); } -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) +/** + * xe_ttm_vram_mgr_init - initialize TTM VRAM region + * @xe: pointer to Xe device + * @vram: pointer to xe_vram_region that contains the memory region attributes + * + * Initialize the Xe TTM for given @vram region using the given parameters. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram) { - struct xe_device *xe = tile_to_xe(tile); - struct xe_vram_region *vram = &tile->mem.vram; - - return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - vram->usable_size, vram->io_size, + return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement, + xe_vram_region_usable_size(vram), + xe_vram_region_io_size(vram), PAGE_SIZE); } @@ -392,7 +400,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = cursor.start + tile->mem.vram.io_start; + phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram); size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index cc76050e376d..87b7fae5edba 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -11,11 +11,12 @@ enum dma_data_direction; struct xe_device; struct xe_tile; +struct xe_vram_region; int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u32 mem_type, u64 size, u64 io_size, u64 default_page_size); -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, u64 offset, u64 length, diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 828b45b24c23..a524170a04d0 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -99,7 +99,7 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 86842247e9d8..c00a5ff31817 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -28,7 +28,6 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -38,6 +37,8 @@ #include "xe_res_cursor.h" #include "xe_svm.h" #include "xe_sync.h" +#include "xe_tile.h" +#include "xe_tlb_inval.h" #include "xe_trace_bo.h" #include "xe_wa.h" #include "xe_hmm.h" @@ -393,6 +394,9 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, &vm->rebind_list); + if (!try_wait_for_completion(&vm->xe->pm_block)) + return -EAGAIN; + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); if (ret) return ret; @@ -479,6 +483,33 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } +static bool vm_suspend_rebind_worker(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + bool ret = false; + + mutex_lock(&xe->rebind_resume_lock); + if (!try_wait_for_completion(&vm->xe->pm_block)) { + ret = true; + list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); + } + mutex_unlock(&xe->rebind_resume_lock); + + return ret; +} + +/** + * xe_vm_resume_rebind_worker() - Resume the rebind worker. + * @vm: The vm whose preempt worker to resume. + * + * Resume a preempt worker that was previously suspended by + * vm_suspend_rebind_worker(). + */ +void xe_vm_resume_rebind_worker(struct xe_vm *vm) +{ + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); @@ -502,6 +533,11 @@ static void preempt_rebind_work_func(struct work_struct *w) } retry: + if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { + up_write(&vm->lock); + return; + } + if (xe_vm_userptr_check_repin(vm)) { err = xe_vm_userptr_pin(vm); if (err) @@ -953,7 +989,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); @@ -1043,7 +1079,7 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); @@ -1126,7 +1162,7 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_unbind(&vops, range); @@ -1168,7 +1204,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 start, u64 end, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, + unsigned int flags) { struct xe_vma *vma; struct xe_tile *tile; @@ -1223,7 +1260,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - vma->pat_index = pat_index; + vma->attr = *attr; if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -1512,14 +1549,39 @@ static u64 pte_encode_ps(u32 pt_level) return 0; } -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const u16 pat_index) +static u16 pde_pat_index(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + u16 pat_index; + + /* + * We only have two bits to encode the PAT index in non-leaf nodes, but + * these only point to other paging structures so we only need a minimal + * selection of options. The user PAT index is only for encoding leaf + * nodes, where we have use of more bits to do the encoding. The + * non-leaf nodes are instead under driver control so the chosen index + * here should be distict from the user PAT index. Also the + * corresponding coherency of the PAT index should be tied to the + * allocation type of the page table (or at least we should pick + * something which is always safe). + */ + if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) + pat_index = xe->pat.idx[XE_CACHE_WB]; + else + pat_index = xe->pat.idx[XE_CACHE_NONE]; + + xe_assert(xe, pat_index <= 3); + + return pat_index; +} + +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) { u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(pat_index); + pde |= pde_encode_pat_index(pde_pat_index(bo)); return pde; } @@ -1610,8 +1672,12 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } @@ -1640,7 +1706,7 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; @@ -1661,9 +1727,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1709,6 +1776,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + INIT_LIST_HEAD(&vm->preempt.pm_activate_link); } if (flags & XE_VM_FLAG_FAULT_MODE) { @@ -1794,6 +1862,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_unlock_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; @@ -1814,6 +1896,8 @@ err_no_resv: for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -1850,7 +1934,7 @@ static void xe_vm_close(struct xe_vm *vm) xe_pt_clear(xe, vm->pt_root[id]); for_each_gt(gt, xe, id) - xe_gt_tlb_invalidation_vm(gt, vm); + xe_tlb_inval_vm(>->tlb_inval, vm); } } @@ -1874,8 +1958,12 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) + if (xe_vm_in_preempt_fence_mode(vm)) { + mutex_lock(&xe->rebind_resume_lock); + list_del_init(&vm->preempt.pm_activate_link); + mutex_unlock(&xe->rebind_resume_lock); flush_work(&vm->preempt.rebind_work); + } if (xe_vm_in_fault_mode(vm)) xe_svm_close(vm); @@ -2024,8 +2112,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); } static struct xe_exec_queue * @@ -2059,16 +2146,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; - if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) + if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && @@ -2097,29 +2183,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); @@ -2169,6 +2236,108 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + u32 num_vmas = 0; + + lockdep_assert_held(&vm->lock); + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) + num_vmas++; + + return num_vmas; +} + +static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, + u64 end, struct drm_xe_mem_range_attr *attrs) +{ + struct drm_gpuva *gpuva; + int i = 0; + + lockdep_assert_held(&vm->lock); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (i == *num_vmas) + return -ENOSPC; + + attrs[i].start = xe_vma_start(vma); + attrs[i].end = xe_vma_end(vma); + attrs[i].atomic.val = vma->attr.atomic_access; + attrs[i].pat_index.val = vma->attr.pat_index; + attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; + attrs[i].preferred_mem_loc.migration_policy = + vma->attr.preferred_loc.migration_policy; + + i++; + } + + *num_vmas = i; + return 0; +} + +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_mem_range_attr *mem_attrs; + struct drm_xe_vm_query_mem_range_attr *args = data; + u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + struct xe_vm *vm; + int err = 0; + + if (XE_IOCTL_DBG(xe, + ((args->num_mem_ranges == 0 && + (attrs_user || args->sizeof_mem_range_attr != 0)) || + (args->num_mem_ranges > 0 && + (!attrs_user || + args->sizeof_mem_range_attr != + sizeof(struct drm_xe_mem_range_attr)))))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + err = down_read_interruptible(&vm->lock); + if (err) + goto put_vm; + + attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + + if (args->num_mem_ranges == 0 && !attrs_user) { + args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); + args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); + goto unlock_vm; + } + + mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!mem_attrs) { + err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; + goto unlock_vm; + } + + memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); + err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, + args->start + args->range, mem_attrs); + if (err) + goto free_mem_attrs; + + err = copy_to_user(attrs_user, mem_attrs, + args->sizeof_mem_range_attr * args->num_mem_ranges); + +free_mem_attrs: + kvfree(mem_attrs); +unlock_vm: + up_read(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} + static bool vma_matches(struct xe_vma *vma, u64 page_addr) { if (page_addr > xe_vma_end(vma) - 1 || @@ -2374,9 +2543,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile; struct xe_svm_range *svm_range; struct drm_gpusvm_ctx ctx = {}; - struct xe_tile *tile; + struct drm_pagemap *dpagemap; u8 id, tile_mask = 0; u32 i; @@ -2393,8 +2563,24 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, tile_mask |= 0x1 << id; xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); - op->prefetch_range.region = prefetch_region; op->prefetch_range.ranges_count = 0; + tile = NULL; + + if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { + dpagemap = xe_vma_resolve_pagemap(vma, + xe_device_get_root_tile(vm->xe)); + /* + * TODO: Once multigpu support is enabled will need + * something to dereference tile from dpagemap. + */ + if (dpagemap) + tile = xe_device_get_root_tile(vm->xe); + } else if (prefetch_region) { + tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - + XE_PL_VRAM0]; + } + + op->prefetch_range.tile = tile; alloc_next_range: svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); @@ -2413,7 +2599,7 @@ alloc_next_range: goto unwind_prefetch_ops; } - if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); goto check_next_range; } @@ -2450,7 +2636,7 @@ unwind_prefetch_ops: ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, unsigned int flags) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct drm_exec exec; @@ -2479,7 +2665,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, } vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + - op->va.range - 1, pat_index, flags); + op->va.range - 1, attr, flags); if (IS_ERR(vma)) goto err_unlock; @@ -2596,6 +2782,29 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) return err; } +/** + * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes + * @vma: Pointer to the xe_vma structure to check + * + * This function determines whether the given VMA (Virtual Memory Area) + * has its memory attributes set to their default values. Specifically, + * it checks the following conditions: + * + * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` + * - `pat_index` is equal to `default_pat_index` + * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` + * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` + * + * Return: true if all attributes are at their default values, false otherwise. + */ +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) +{ + return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && + vma->attr.pat_index == vma->attr.default_pat_index && + vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && + vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); +} + static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, struct xe_vma_ops *vops) { @@ -2622,6 +2831,16 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + .default_pat_index = op->map.pat_index, + .pat_index = op->map.pat_index, + }; + flags |= op->map.read_only ? VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? @@ -2631,7 +2850,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, flags |= op->map.is_cpu_addr_mirror ? VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - vma = new_vma(vm, &op->base.map, op->map.pat_index, + vma = new_vma(vm, &op->base.map, &default_attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2659,8 +2878,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, end = op->base.remap.next->va.addr; if (xe_vma_is_cpu_addr_mirror(old) && - xe_svm_has_mapping(vm, start, end)) - return -EBUSY; + xe_svm_has_mapping(vm, start, end)) { + if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) + xe_svm_unmap_address_range(vm, start, end); + else + return -EBUSY; + } op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); @@ -2679,7 +2902,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2709,7 +2932,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, if (op->base.remap.next) { vma = new_vma(vm, op->base.remap.next, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2896,30 +3119,26 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile = op->prefetch_range.tile; int err = 0; struct xe_svm_range *svm_range; struct drm_gpusvm_ctx ctx = {}; - struct xe_tile *tile; unsigned long i; - u32 region; if (!xe_vma_is_cpu_addr_mirror(vma)) return 0; - region = op->prefetch_range.region; - ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { - if (!region) + if (!tile) xe_svm_range_migrate_to_smem(vm, svm_range); - if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { - tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { err = xe_svm_alloc_vram(tile, svm_range, &ctx); if (err) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", @@ -2982,12 +3201,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); u32 region; - if (xe_vma_is_cpu_addr_mirror(vma)) - region = op->prefetch_range.region; - else + if (!xe_vma_is_cpu_addr_mirror(vma)) { region = op->prefetch.region; - - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || + region <= ARRAY_SIZE(region_to_mem_type)); + } err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), @@ -3405,8 +3623,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & - xe->info.mem_region_mask)) || + XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && + !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_UNMAP)) { err = -EINVAL; @@ -3428,6 +3646,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3534,7 +3753,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; @@ -3552,7 +3771,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { err = -ENOENT; - goto put_vm; + goto free_bind_ops; } if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { @@ -3598,7 +3817,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; - goto release_vm_lock; + goto free_bos; } } @@ -3732,17 +3951,20 @@ free_syncs: put_obj: for (i = 0; i < args->num_binds; ++i) xe_bo_put(bos[i]); + + kvfree(ops); +free_bos: + kvfree(bos); release_vm_lock: up_write(&vm->lock); put_exec_queue: if (q) xe_exec_queue_put(q); -put_vm: - xe_vm_put(vm); - kvfree(bos); - kvfree(ops); +free_bind_ops: if (args->num_binds > 1) kvfree(bind_ops); +put_vm: + xe_vm_put(vm); return err; } @@ -3850,7 +4072,7 @@ void xe_vm_unlock(struct xe_vm *vm) } /** - * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an * address range * @vm: The VM * @start: start address @@ -3861,10 +4083,11 @@ void xe_vm_unlock(struct xe_vm *vm) * * Returns 0 for success, negative error code otherwise. */ -int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask) +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) { - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tlb_inval_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; struct xe_tile *tile; u32 fence_id = 0; u8 id; @@ -3874,39 +4097,36 @@ int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, return 0; for_each_tile(tile, vm->xe, id) { - if (tile_mask & BIT(id)) { - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->primary_gt, - &fence[fence_id], - start, - end, - vm->usm.asid); - if (err) - goto wait; - ++fence_id; + if (!(tile_mask & BIT(id))) + continue; - if (!tile->media_gt) - continue; + xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, + &fence[fence_id], true); - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], true); + err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; - err = xe_gt_tlb_invalidation_range(tile->media_gt, - &fence[fence_id], - start, - end, - vm->usm.asid); - if (err) - goto wait; - ++fence_id; - } + if (!tile->media_gt) + continue; + + xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; } wait: for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + xe_tlb_inval_fence_wait(&fence[id]); return err; } @@ -3965,8 +4185,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) xe_device_wmb(xe); - ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), - xe_vma_end(vma), tile_mask); + ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); @@ -4168,3 +4388,223 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) } kvfree(snap); } + +/** + * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations + * @xe: Pointer to the XE device structure + * @vma: Pointer to the virtual memory area (VMA) structure + * @is_atomic: In pagefault path and atomic operation + * + * This function determines whether the given VMA needs to be migrated to + * VRAM in order to do atomic GPU operation. + * + * Return: + * 1 - Migration to VRAM is required + * 0 - Migration is not required + * -EACCES - Invalid access for atomic memory attr + * + */ +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) +{ + u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : + vma->attr.atomic_access; + + if (!IS_DGFX(xe) || !is_atomic) + return false; + + /* + * NOTE: The checks implemented here are platform-specific. For + * instance, on a device supporting CXL atomics, these would ideally + * work universally without additional handling. + */ + switch (atomic_access) { + case DRM_XE_ATOMIC_DEVICE: + return !xe->info.has_device_atomics_on_smem; + + case DRM_XE_ATOMIC_CPU: + return -EACCES; + + case DRM_XE_ATOMIC_UNDEFINED: + case DRM_XE_ATOMIC_GLOBAL: + default: + return 1; + } +} + +static int xe_vm_alloc_vma(struct xe_vm *vm, + struct drm_gpuvm_map_req *map_req, + bool is_madvise) +{ + struct xe_vma_ops vops; + struct drm_gpuva_ops *ops = NULL; + struct drm_gpuva_op *__op; + bool is_cpu_addr_mirror = false; + bool remap_op = false; + struct xe_vma_mem_attr tmp_attr; + u16 default_pat; + int err; + + lockdep_assert_held_write(&vm->lock); + + if (is_madvise) + ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); + else + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); + + if (IS_ERR(ops)) + return PTR_ERR(ops); + + if (list_empty(&ops->list)) { + err = 0; + goto free_ops; + } + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma = NULL; + + if (!is_madvise) { + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); + default_pat = vma->attr.default_pat_index; + } + + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + default_pat = vma->attr.default_pat_index; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.is_cpu_addr_mirror = true; + op->map.pat_index = default_pat; + } + } else { + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + xe_assert(vm->xe, !remap_op); + xe_assert(vm->xe, xe_vma_has_no_bo(vma)); + remap_op = true; + + if (xe_vma_is_cpu_addr_mirror(vma)) + is_cpu_addr_mirror = true; + else + is_cpu_addr_mirror = false; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + xe_assert(vm->xe, remap_op); + remap_op = false; + /* + * In case of madvise ops DRM_GPUVA_OP_MAP is + * always after DRM_GPUVA_OP_REMAP, so ensure + * we assign op->map.is_cpu_addr_mirror true + * if REMAP is for xe_vma_is_cpu_addr_mirror vma + */ + op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; + } + } + print_op(vm->xe, __op); + } + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + + if (is_madvise) + vops.flags |= XE_VMA_OPS_FLAG_MADVISE; + + err = vm_bind_ioctl_ops_parse(vm, ops, &vops); + if (err) + goto unwind_ops; + + xe_vm_lock(vm, false); + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma; + + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + /* There should be no unmap for madvise */ + if (is_madvise) + XE_WARN_ON("UNEXPECTED UNMAP"); + + xe_vma_destroy(vma, NULL); + } else if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + /* In case of madvise ops Store attributes for REMAP UNMAPPED + * VMA, so they can be assigned to newly MAP created vma. + */ + if (is_madvise) + tmp_attr = vma->attr; + + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); + } else if (__op->op == DRM_GPUVA_OP_MAP) { + vma = op->map.vma; + /* In case of madvise call, MAP will always be follwed by REMAP. + * Therefore temp_attr will always have sane values, making it safe to + * copy them to new vma. + */ + if (is_madvise) + vma->attr = tmp_attr; + } + } + + xe_vm_unlock(vm); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return 0; + +unwind_ops: + vm_bind_ioctl_ops_unwind(vm, &ops, 1); +free_ops: + drm_gpuva_ops_free(&vm->gpuvm, ops); + return err; +} + +/** + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); + + return xe_vm_alloc_vma(vm, &map_req, true); +} + +/** + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits/merges existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", + start, range); + + return xe_vm_alloc_vma(vm, &map_req, false); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3475a118f666..d631c4b25c51 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); @@ -66,6 +66,8 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma); + /** * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs * @vm: The vm @@ -171,6 +173,12 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic); + +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + /** * to_userptr_vma() - Return a pointer to an embedding userptr vma * @vma: Pointer to the embedded struct xe_vma @@ -191,7 +199,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file); - +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void xe_vm_close_and_put(struct xe_vm *vm); static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) @@ -228,8 +236,8 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); -int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask); +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask); int xe_vm_invalidate_vma(struct xe_vma *vma); @@ -273,6 +281,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, struct xe_exec_queue *q, u64 addr, enum xe_cache_level cache_lvl); +void xe_vm_resume_rebind_worker(struct xe_vm *vm); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm @@ -315,22 +325,14 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. - * - * Return: A pin cookie that should be used for xe_vm_clear_validating(). */ -static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, - bool allow_res_evict) +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { - struct pin_cookie cookie = {}; - if (vm && !allow_res_evict) { xe_vm_assert_held(vm); - cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, current); } - - return cookie; } /** @@ -338,17 +340,14 @@ static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, * @vm: Pointer to the vm or NULL * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same * value as for xe_vm_set_validation(). - * @cookie: Cookie obtained from xe_vm_set_validating(). * * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. */ -static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, - struct pin_cookie cookie) +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) { if (vm && !allow_res_evict) { - lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, NULL); } diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..09c5783ee523 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_vm_madvise.h" + +#include <linux/nospec.h> +#include <drm/xe_drm.h> + +#include "xe_bo.h" +#include "xe_pat.h" +#include "xe_pt.h" +#include "xe_svm.h" + +struct xe_vmas_in_madvise_range { + u64 addr; + u64 range; + struct xe_vma **vmas; + int num_vmas; + bool has_svm_vmas; + bool has_bo_vmas; + bool has_userptr_vmas; +}; + +static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) +{ + u64 addr = madvise_range->addr; + u64 range = madvise_range->range; + + struct xe_vma **__vmas; + struct drm_gpuva *gpuva; + int max_vmas = 8; + + lockdep_assert_held(&vm->lock); + + madvise_range->num_vmas = 0; + madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); + if (!madvise_range->vmas) + return -ENOMEM; + + vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (xe_vma_bo(vma)) + madvise_range->has_bo_vmas = true; + else if (xe_vma_is_cpu_addr_mirror(vma)) + madvise_range->has_svm_vmas = true; + else if (xe_vma_is_userptr(vma)) + madvise_range->has_userptr_vmas = true; + + if (madvise_range->num_vmas == max_vmas) { + max_vmas <<= 1; + __vmas = krealloc(madvise_range->vmas, + max_vmas * sizeof(*madvise_range->vmas), + GFP_KERNEL); + if (!__vmas) { + kfree(madvise_range->vmas); + return -ENOMEM; + } + madvise_range->vmas = __vmas; + } + + madvise_range->vmas[madvise_range->num_vmas] = vma; + (madvise_range->num_vmas)++; + } + + if (!madvise_range->num_vmas) + kfree(madvise_range->vmas); + + vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); + + return 0; +} + +static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); + + for (i = 0; i < num_vmas; i++) { + /*TODO: Extend attributes to bo based vmas */ + if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && + vmas[i]->attr.preferred_loc.migration_policy == + op->preferred_mem_loc.migration_policy) || + !xe_vma_is_cpu_addr_mirror(vmas[i])) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; + /* Till multi-device support is not added migration_policy + * is of no use and can be ignored. + */ + vmas[i]->attr.preferred_loc.migration_policy = + op->preferred_mem_loc.migration_policy; + } + } +} + +static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + struct xe_bo *bo; + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); + xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); + + for (i = 0; i < num_vmas; i++) { + if (xe_vma_is_userptr(vmas[i]) && + !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && + xe->info.has_device_atomics_on_smem)) { + vmas[i]->skip_invalidation = true; + continue; + } + + if (vmas[i]->attr.atomic_access == op->atomic.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.atomic_access = op->atomic.val; + } + + vmas[i]->attr.atomic_access = op->atomic.val; + + bo = xe_vma_bo(vmas[i]); + if (!bo || bo->attr.atomic_access == op->atomic.val) + continue; + + vmas[i]->skip_invalidation = false; + xe_bo_assert_held(bo); + bo->attr.atomic_access = op->atomic.val; + + /* Invalidate cpu page table, so bo can migrate to smem in next access */ + if (xe_bo_is_vram(bo) && + (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || + bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) + ttm_bo_unmap_virtual(&bo->ttm); + } +} + +static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); + + for (i = 0; i < num_vmas; i++) { + if (vmas[i]->attr.pat_index == op->pat_index.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.pat_index = op->pat_index.val; + } + } +} + +typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, + [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, + [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, +}; + +static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + struct xe_tile *tile; + u8 id, tile_mask = 0; + + lockdep_assert_held_write(&vm->lock); + + /* Wait for pending binds */ + if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT) <= 0) + XE_WARN_ON(1); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (vma->skip_invalidation || xe_vma_is_null(vma)) + continue; + + if (xe_vma_is_cpu_addr_mirror(vma)) { + tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, + xe_vma_start(vma), + xe_vma_end(vma)); + } else { + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_mask |= BIT(id); + + /* + * WRITE_ONCE pairs with READ_ONCE + * in xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + +static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); + + if (!tile_mask) + return 0; + + xe_device_wmb(vm->xe); + + return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); +} + +static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) +{ + if (XE_IOCTL_DBG(xe, !args)) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) + return false; + + switch (args->type) { + case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: + { + s32 fd = (s32)args->preferred_mem_loc.devmem_fd; + + if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + break; + } + case DRM_XE_MEM_RANGE_ATTR_ATOMIC: + if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + + break; + case DRM_XE_MEM_RANGE_ATTR_PAT: + { + u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); + + if (XE_IOCTL_DBG(xe, !coh_mode)) + return false; + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) + return false; + break; + } + default: + if (XE_IOCTL_DBG(xe, 1)) + return false; + } + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return false; + + return true; +} + +static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, + int num_vmas, u32 atomic_val) +{ + struct xe_device *xe = vm->xe; + struct xe_bo *bo; + int i; + + for (i = 0; i < num_vmas; i++) { + bo = xe_vma_bo(vmas[i]); + if (!bo) + continue; + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && + !(bo->flags & XE_BO_FLAG_SYSTEM))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && + !(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1) && + !(bo->flags & XE_BO_FLAG_SYSTEM && + xe->info.has_device_atomics_on_smem))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && + (!(bo->flags & XE_BO_FLAG_SYSTEM) || + (!(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1))))) + return false; + } + return true; +} +/** + * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM + * @dev: DRM device pointer + * @data: Pointer to ioctl data (drm_xe_madvise*) + * @file: DRM file pointer + * + * Handles the MADVISE ioctl to provide memory advice for vma's within + * input range. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_madvise *args = data; + struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, + .range = args->range, }; + struct xe_vm *vm; + struct drm_exec exec; + int err, attr_type; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + if (!madvise_args_are_sane(vm->xe, args)) { + err = -EINVAL; + goto put_vm; + } + + xe_svm_flush(vm); + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto unlock_vm; + } + + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto unlock_vm; + + err = get_vmas(vm, &madvise_range); + if (err || !madvise_range.num_vmas) + goto unlock_vm; + + if (madvise_range.has_bo_vmas) { + if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { + if (!check_bo_args_are_sane(vm, madvise_range.vmas, + madvise_range.num_vmas, + args->atomic.val)) { + err = -EINVAL; + goto unlock_vm; + } + } + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + for (int i = 0; i < madvise_range.num_vmas; i++) { + struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); + + if (!bo) + continue; + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) + goto err_fini; + } + } + } + + if (madvise_range.has_userptr_vmas) { + err = down_read_interruptible(&vm->userptr.notifier_lock); + if (err) + goto err_fini; + } + + if (madvise_range.has_svm_vmas) { + err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock); + if (err) + goto unlock_userptr; + } + + attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + + err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + + if (madvise_range.has_svm_vmas) + xe_svm_notifier_unlock(vm); + +unlock_userptr: + if (madvise_range.has_userptr_vmas) + up_read(&vm->userptr.notifier_lock); +err_fini: + if (madvise_range.has_bo_vmas) + drm_exec_fini(&exec); + kfree(madvise_range.vmas); + madvise_range.vmas = NULL; +unlock_vm: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h new file mode 100644 index 000000000000..b0e1fc445f23 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VM_MADVISE_H_ +#define _XE_VM_MADVISE_H_ + +struct drm_device; +struct drm_file; + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 8a07feef503b..e1a786db5f89 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -77,6 +77,44 @@ struct xe_userptr { #endif }; +/** + * struct xe_vma_mem_attr - memory attributes associated with vma + */ +struct xe_vma_mem_attr { + /** @preferred_loc: perferred memory_location */ + struct { + /** @preferred_loc.migration_policy: Pages migration policy */ + u32 migration_policy; + + /** + * @preferred_loc.devmem_fd: used for determining pagemap_fd + * requested by user DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM and + * DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE mean system memory or + * closest device memory respectively. + */ + u32 devmem_fd; + } preferred_loc; + + /** + * @atomic_access: The atomic access type for the vma + * See %DRM_XE_VMA_ATOMIC_UNDEFINED, %DRM_XE_VMA_ATOMIC_DEVICE, + * %DRM_XE_VMA_ATOMIC_GLOBAL, and %DRM_XE_VMA_ATOMIC_CPU for possible + * values. These are defined in uapi/drm/xe_drm.h. + */ + u32 atomic_access; + + /** + * @default_pat_index: The pat index for VMA set during first bind by user. + */ + u16 default_pat_index; + + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + * same as default_pat_index unless overwritten by madvise. + */ + u16 pat_index; +}; + struct xe_vma { /** @gpuva: Base GPUVA object */ struct drm_gpuva gpuva; @@ -126,15 +164,22 @@ struct xe_vma { u8 tile_staged; /** - * @pat_index: The pat index to use when encoding the PTEs for this vma. + * @skip_invalidation: Used in madvise to avoid invalidation + * if mem attributes doesn't change */ - u16 pat_index; + bool skip_invalidation; /** * @ufence: The user fence that was provided with MAP. * Needs to be signalled before UNMAP can be processed. */ struct xe_user_fence *ufence; + + /** + * @attr: The attributes of vma which determines the migration policy + * and encoding of the PTEs for this vma. + */ + struct xe_vma_mem_attr attr; }; /** @@ -293,6 +338,11 @@ struct xe_vm { * BOs */ struct work_struct rebind_work; + /** + * @preempt.pm_activate_link: Link to list of rebind workers to be + * kicked on resume. + */ + struct list_head pm_activate_link; } preempt; /** @um: unified memory state */ @@ -395,8 +445,11 @@ struct xe_vma_op_prefetch_range { struct xarray range; /** @ranges_count: number of svm ranges to map */ u32 ranges_count; - /** @region: memory region to prefetch to */ - u32 region; + /** + * @tile: Pointer to the tile structure containing memory to prefetch. + * NULL if prefetch requested region is smem + */ + struct xe_tile *tile; }; /** enum xe_vma_op_flags - flags for VMA operation */ @@ -462,6 +515,7 @@ struct xe_vma_ops { struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) +#define XE_VMA_OPS_FLAG_MADVISE BIT(1) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index e421a74fb87c..b44ebf50fedb 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -3,6 +3,7 @@ * Copyright © 2021-2024 Intel Corporation */ +#include <kunit/visibility.h> #include <linux/pci.h> #include <drm/drm_managed.h> @@ -19,7 +20,9 @@ #include "xe_mmio.h" #include "xe_module.h" #include "xe_sriov.h" +#include "xe_ttm_vram_mgr.h" #include "xe_vram.h" +#include "xe_vram_types.h" #define BAR_SIZE_SHIFT 20 @@ -136,7 +139,7 @@ static bool resource_is_valid(struct pci_dev *pdev, int bar) return true; } -static int determine_lmem_bar_size(struct xe_device *xe) +static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -147,16 +150,16 @@ static int determine_lmem_bar_size(struct xe_device *xe) resize_vram_bar(xe); - xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); - xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); - if (!xe->mem.vram.io_size) + lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); + lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!lmem_bar->io_size) return -EIO; /* XXX: Need to change when xe link code is ready */ - xe->mem.vram.dpa_base = 0; + lmem_bar->dpa_base = 0; /* set up a map to the total memory area. */ - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size); return 0; } @@ -278,13 +281,71 @@ static void vram_fini(void *arg) struct xe_tile *tile; int id; - if (xe->mem.vram.mapping) - iounmap(xe->mem.vram.mapping); - - xe->mem.vram.mapping = NULL; + xe->mem.vram->mapping = NULL; for_each_tile(tile, xe, id) - tile->mem.vram.mapping = NULL; + tile->mem.vram->mapping = NULL; +} + +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) +{ + struct xe_vram_region *vram; + struct drm_device *drm = &xe->drm; + + xe_assert(xe, id < xe->info.tile_count); + + vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return NULL; + + vram->xe = xe; + vram->id = id; + vram->placement = placement; +#if defined(CONFIG_DRM_XE_PAGEMAP) + vram->migrate = xe->tiles[id].migrate; +#endif + return vram; +} + +static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram) +{ + struct drm_device *drm = &xe->drm; + + if (vram->io_size < vram->usable_size) + drm_info(drm, "Small BAR device\n"); + + drm_info(drm, + "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", + vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size); + drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", + vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size, + &vram->io_start, vram->io_start + (u64)vram->io_size); +} + +static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram, + struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size, + u64 region_size, resource_size_t remain_io_size) +{ + /* Check if VRAM region is already initialized */ + if (vram->mapping) + return 0; + + vram->actual_physical_size = region_size; + vram->io_start = lmem_bar->io_start + offset; + vram->io_size = min_t(u64, usable_size, remain_io_size); + + if (!vram->io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + + vram->dpa_base = lmem_bar->dpa_base + offset; + vram->mapping = lmem_bar->mapping + offset; + vram->usable_size = usable_size; + + print_vram_region_info(xe, vram); + + return 0; } /** @@ -298,78 +359,108 @@ static void vram_fini(void *arg) int xe_vram_probe(struct xe_device *xe) { struct xe_tile *tile; - resource_size_t io_size; + struct xe_vram_region lmem_bar; + resource_size_t remain_io_size; u64 available_size = 0; u64 total_size = 0; - u64 tile_offset; - u64 tile_size; - u64 vram_size; int err; u8 id; if (!IS_DGFX(xe)) return 0; - /* Get the size of the root tile's vram for later accessibility comparison */ - tile = xe_device_get_root_tile(xe); - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + err = determine_lmem_bar_size(xe, &lmem_bar); if (err) return err; + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size); - err = determine_lmem_bar_size(xe); - if (err) - return err; - - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.io_size); - - io_size = xe->mem.vram.io_size; + remain_io_size = lmem_bar.io_size; - /* tile specific ranges */ for_each_tile(tile, xe, id) { - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + u64 region_size; + u64 usable_size; + u64 tile_offset; + + err = tile_vram_size(tile, &usable_size, ®ion_size, &tile_offset); if (err) return err; - tile->mem.vram.actual_physical_size = tile_size; - tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; - tile->mem.vram.io_size = min_t(u64, vram_size, io_size); + total_size += region_size; + available_size += usable_size; + + err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size, + region_size, remain_io_size); + if (err) + return err; - if (!tile->mem.vram.io_size) { - drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); - return -ENODEV; + if (total_size > lmem_bar.io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + &total_size, &lmem_bar.io_size); } - tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; - tile->mem.vram.usable_size = vram_size; - tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size); + } - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) - drm_info(&xe->drm, "Small BAR device\n"); - drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, - tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); - drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); + err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size, + lmem_bar.io_size); + if (err) + return err; - /* calculate total size using tile size to get the correct HW sizing */ - total_size += tile_size; - available_size += vram_size; + return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +} - if (total_size > xe->mem.vram.io_size) { - drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", - &total_size, &xe->mem.vram.io_size); - } +/** + * xe_vram_region_io_start - Get the IO start of a VRAM region + * @vram: the VRAM region + * + * Return: the IO start of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram) +{ + return vram ? vram->io_start : 0; +} - io_size -= min_t(u64, tile_size, io_size); - } +/** + * xe_vram_region_io_size - Get the IO size of a VRAM region + * @vram: the VRAM region + * + * Return: the IO size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram) +{ + return vram ? vram->io_size : 0; +} - xe->mem.vram.actual_physical_size = total_size; +/** + * xe_vram_region_dpa_base - Get the DPA base of a VRAM region + * @vram: the VRAM region + * + * Return: the DPA base of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram) +{ + return vram ? vram->dpa_base : 0; +} - drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.actual_physical_size); - drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &available_size); +/** + * xe_vram_region_usable_size - Get the usable size of a VRAM region + * @vram: the VRAM region + * + * Return: the usable size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram) +{ + return vram ? vram->usable_size : 0; +} - return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +/** + * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region + * @vram: the VRAM region + * + * Return: the actual physical size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram) +{ + return vram ? vram->actual_physical_size : 0; } +EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size); diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index e31cc04ec0db..72860f714fc6 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -6,8 +6,19 @@ #ifndef _XE_VRAM_H_ #define _XE_VRAM_H_ +#include <linux/types.h> + struct xe_device; +struct xe_vram_region; int xe_vram_probe(struct xe_device *xe); +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); + +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram); + #endif diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index b26e26d73dae..17bc84da4cdc 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) @@ -56,7 +56,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h new file mode 100644 index 000000000000..83772dcbf1af --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VRAM_TYPES_H_ +#define _XE_VRAM_TYPES_H_ + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +#include <drm/drm_pagemap.h> +#endif + +#include "xe_ttm_vram_mgr_types.h" + +struct xe_device; +struct xe_migrate; + +/** + * struct xe_vram_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_vram_region { + /** @xe: Back pointer to xe device */ + struct xe_device *xe; + /** + * @id: VRAM region instance id + * + * The value should be unique for VRAM region. + */ + u8 id; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; + /** @placement: TTM placement dedicated for this region */ + u32 placement; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @migrate: Back pointer to migrate */ + struct xe_migrate *migrate; + /** @pagemap: Used to remap device memory as ZONE_DEVICE */ + struct dev_pagemap pagemap; + /** + * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory + * pages of this tile. + */ + struct drm_pagemap dpagemap; + /** + * @hpa_base: base host physical address + * + * This is generated when remap device memory as ZONE_DEVICE + */ + resource_size_t hpa_base; +#endif +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 22a98600fd8f..52c7df4c3afd 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -538,6 +538,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, /* Xe2_HPG */ @@ -602,6 +607,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, /* Xe2_LPM */ @@ -647,7 +657,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, + GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, @@ -905,13 +916,13 @@ void xe_wa_process_device_oob(struct xe_device *xe) } /** - * xe_wa_process_oob - process OOB workaround table + * xe_wa_process_gt_oob - process GT OOB workaround table * @gt: GT instance to process workarounds for * * Process OOB workaround table for this platform, marking in @gt the * workarounds that are active. */ -void xe_wa_process_oob(struct xe_gt *gt) +void xe_wa_process_gt_oob(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); @@ -995,12 +1006,12 @@ int xe_wa_device_init(struct xe_device *xe) } /** - * xe_wa_init - initialize gt with workaround bookkeeping + * xe_wa_gt_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * * Returns 0 for success, negative error code otherwise. */ -int xe_wa_init(struct xe_gt *gt) +int xe_wa_gt_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); size_t n_oob, n_lrc, n_engine, n_gt, total; @@ -1026,7 +1037,7 @@ int xe_wa_init(struct xe_gt *gt) return 0; } -ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +ALLOW_ERROR_INJECTION(xe_wa_gt_init, ERRNO); /* See xe_pci_probe() */ void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) { @@ -1079,6 +1090,6 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) if (IS_SRIOV_VF(tile->xe)) return; - if (XE_WA(tile->primary_gt, 22010954014)) + if (XE_GT_WA(tile->primary_gt, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index f3880c65cb8d..6a869b2de643 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -14,9 +14,9 @@ struct xe_hw_engine; struct xe_tile; int xe_wa_device_init(struct xe_device *xe); -int xe_wa_init(struct xe_gt *gt); +int xe_wa_gt_init(struct xe_gt *gt); void xe_wa_process_device_oob(struct xe_device *xe); -void xe_wa_process_oob(struct xe_gt *gt); +void xe_wa_process_gt_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); @@ -25,11 +25,11 @@ void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, to be queried and called as needed. + * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ -#define XE_WA(gt__, id__) ({ \ +#define XE_GT_WA(gt__, id__) ({ \ xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \ test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e990f20eccfe..338c344dcd7d 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,4 +1,6 @@ 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) +16010904313 GRAPHICS_VERSION_RANGE(1200, 1210) +18022495364 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) @@ -30,7 +32,8 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION(3001) + GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) @@ -46,7 +49,7 @@ 16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) -no_media_l3 MEDIA_VERSION(3000) +no_media_l3 MEDIA_VERSION_RANGE(3000, 3002) 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) 16021333562 GRAPHICS_VERSION_RANGE(1200, 1274) @@ -66,9 +69,16 @@ no_media_l3 MEDIA_VERSION(3000) MEDIA_VERSION_RANGE(1300, 3000) MEDIA_VERSION(3002) GRAPHICS_VERSION(3003) +14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled) +16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) 15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) +16026007364 MEDIA_VERSION(3000) |