diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
36 files changed, 807 insertions, 244 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a9a41f4e748..6d83ccfa42ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1194,9 +1194,15 @@ struct amdgpu_device { bool debug_exp_resets; bool debug_disable_gpu_ring_reset; - bool enforce_isolation[MAX_XCP]; - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; + bool enforce_isolation[MAX_XCP]; + struct amdgpu_isolation { + void *owner; + struct dma_fence *spearhead; + struct amdgpu_sync active; + struct amdgpu_sync prev; + } isolation[MAX_XCP]; struct amdgpu_init_level *init_lvl; @@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index ffd4c64e123c..dc47f5fd4ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, { struct aca_bank_node *node; struct aca_bank *bank; + int r; if (!adev->cper.enabled) return; @@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, /* UEs must be encoded into separate CPER entries */ if (type == ACA_SMU_TYPE_UE) { + struct aca_banks de_banks; + + aca_banks_init(&de_banks); list_for_each_entry(node, &banks->list, node) { bank = &node->bank; - if (amdgpu_cper_generate_ue_record(adev, bank)) - dev_warn(adev->dev, "fail to generate ue cper records\n"); + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + r = aca_banks_add_bank(&de_banks, bank); + if (r) + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); + } else { + if (amdgpu_cper_generate_ue_record(adev, bank)) + dev_warn(adev->dev, "fail to generate ue cper records\n"); + } + } + + if (!list_empty(&de_banks.list)) { + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) + dev_warn(adev->dev, "fail to generate de cper records\n"); } + + aca_banks_release(&de_banks); } else { /* * SMU_TYPE_CE banks are combined into 1 CPER entries, @@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + /* DEs may contain in CEs or UEs */ + if (type != ACA_ERROR_TYPE_DEFERRED) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6f62e5d80ed6..6b180f1b33fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -76,11 +76,17 @@ struct ras_query_context; #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ -#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ - ((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ - ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \ - ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_CE) +#define ACA_BANK_ERR_IS_DEFFERED(bank) \ + (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ + ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) + +#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_CE) + +#define ACA_BANK_ERR_UE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_UE) enum aca_reg_idx { ACA_REG_IDX_CTL = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2ac6d4fa0601..d2ec4130a316 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update); + return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - (void)amdgpu_sync_fence(sync, bo_va->last_pt_update); + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); return 0; } @@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); } static int map_bo_to_gpuvm(struct kgd_mem *mem, @@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, fence) { - ret = amdgpu_sync_fence(&sync_obj, fence); + ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 3f291b30b79f..360e07a5c7c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -455,10 +455,10 @@ calc: return umin(rec_len, chunk); } -void amdgpu_cper_ring_write(struct amdgpu_ring *ring, - void *src, int count) +void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -485,6 +485,9 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, s += chunk; } + if (ring->count_dw < rec_cnt_dw) + ring->count_dw = 0; + /* the buffer is overflow, adjust rptr */ if (((wptr_old < rptr) && (rptr <= ring->wptr)) || ((ring->wptr < wptr_old) && (wptr_old < rptr)) || @@ -501,12 +504,10 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, pos = rptr; } while (!amdgpu_cper_is_hdr(ring, rptr)); } - mutex_unlock(&ring->adev->cper.ring_lock); - if (ring->count_dw >= (count >> 2)) - ring->count_dw -= (count >> 2); - else - ring->count_dw = 0; + if (ring->count_dw >= rec_cnt_dw) + ring->count_dw -= rec_cnt_dw; + mutex_unlock(&ring->adev->cper.ring_lock); } static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5cc5f59e3018..82df06a72ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); if (r) return r; @@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); return r; } @@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) return -EINVAL; } } @@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, + GFP_KERNEL); if (r) return r; @@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, vm->last_update); + r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL); if (r) return r; @@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) continue; } - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, + GFP_KERNEL); dma_fence_put(fence); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 17ce1677378e..6ebf6179064b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!amdgpu_sriov_vf(adev)) + ret = sysfs_create_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); + + return ret; +} + +static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + sysfs_remove_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); +} + static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) @@ -4172,11 +4190,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) } #endif -static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_pcie_replay_count.attr, - NULL -}; - static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { if (amdgpu_mcbp == 1) @@ -4281,7 +4294,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + for (i = 0; i < MAX_XCP; ++i) { + adev->isolation[i].spearhead = dma_fence_get_stub(); + amdgpu_sync_create(&adev->isolation[i].active); + amdgpu_sync_create(&adev->isolation[i].prev); + } mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.workload_profile_mutex); + mutex_init(&adev->vcn.workload_profile_mutex); amdgpu_device_init_apu_flags(adev); @@ -4399,10 +4419,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; - /* Get rid of things like offb */ - r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); - if (r) - return r; + /* + * No need to remove conflicting FBs for non-display class devices. + * This prevents the sysfb from being freed accidently. + */ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + /* Get rid of things like offb */ + r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); + if (r) + return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4613,7 +4640,7 @@ fence_driver_init: } else adev->ucode_sysfs_en = true; - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); + r = amdgpu_device_attr_sysfs_init(adev); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -4750,7 +4777,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_device_attr_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); @@ -4777,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { - int idx; + int i, idx; bool px; amdgpu_device_ip_fini(adev); @@ -4785,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); + for (i = 0; i < MAX_XCP; ++i) { + dma_fence_put(adev->isolation[i].spearhead); + amdgpu_sync_free(&adev->isolation[i].active); + amdgpu_sync_free(&adev->isolation[i].prev); + } amdgpu_reset_fini(adev); @@ -4800,6 +4832,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->fru_info); adev->fru_info = NULL; + kfree(adev->xcp_mgr); + adev->xcp_mgr = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); if (px || (!dev_is_removable(&adev->pdev->dev) && @@ -5331,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); @@ -6903,22 +6939,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, { struct dma_fence *old = NULL; + dma_fence_get(gang); do { dma_fence_put(old); old = amdgpu_device_get_gang(adev); if (old == gang) break; - if (!dma_fence_is_signaled(old)) + if (!dma_fence_is_signaled(old)) { + dma_fence_put(gang); return old; + } } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, old, gang) != old); + /* + * Drop it once for the exchanged reference in adev and once for the + * thread local reference acquired in amdgpu_device_get_gang(). + */ + dma_fence_put(old); dma_fence_put(old); return NULL; } +/** + * amdgpu_device_enforce_isolation - enforce HW isolation + * @adev: the amdgpu device pointer + * @ring: the HW ring the job is supposed to run on + * @job: the job which is about to be pushed to the HW ring + * + * Makes sure that only one client at a time can use the GFX block. + * Returns: The dependency to wait on before the job can be pushed to the HW. + * The function is called multiple times until NULL is returned. + */ +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + struct drm_sched_fence *f = job->base.s_fence; + struct dma_fence *dep; + void *owner; + int r; + + /* + * For now enforce isolation only for the GFX block since we only need + * the cleaner shader on those rings. + */ + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return NULL; + + /* + * All submissions where enforce isolation is false are handled as if + * they come from a single client. Use ~0l as the owner to distinct it + * from kernel submissions where the owner is NULL. + */ + owner = job->enforce_isolation ? f->owner : (void *)~0l; + + mutex_lock(&adev->enforce_isolation_mutex); + + /* + * The "spearhead" submission is the first one which changes the + * ownership to its client. We always need to wait for it to be + * pushed to the HW before proceeding with anything. + */ + if (&f->scheduled != isolation->spearhead && + !dma_fence_is_signaled(isolation->spearhead)) { + dep = isolation->spearhead; + goto out_grab_ref; + } + + if (isolation->owner != owner) { + + /* + * Wait for any gang to be assembled before switching to a + * different owner or otherwise we could deadlock the + * submissions. + */ + if (!job->gang_submit) { + dep = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(dep)) + goto out_return_dep; + dma_fence_put(dep); + } + + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(&f->scheduled); + amdgpu_sync_move(&isolation->active, &isolation->prev); + trace_amdgpu_isolation(isolation->owner, owner); + isolation->owner = owner; + } + + /* + * Specifying the ring here helps to pipeline submissions even when + * isolation is enabled. If that is not desired for testing NULL can be + * used instead of the ring to enforce a CPU round trip while switching + * between clients. + */ + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); + if (r) + DRM_WARN("OOM tracking isolation\n"); + +out_grab_ref: + dma_fence_get(dep); +out_return_dep: + mutex_unlock(&adev->enforce_isolation_mutex); + return dep; +} + bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 967a992829bd..dc2713ec95a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -113,8 +113,13 @@ #include "amdgpu_isp.h" #endif -#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" -MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 @@ -297,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, return ret; } -static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, + uint8_t *binary, + const char *fw_name) { const struct firmware *fw; - const char *fw_name; int r; - switch (amdgpu_discovery) { - case 2: - fw_name = FIRMWARE_IP_DISCOVERY; - break; - default: - dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); - return -EINVAL; - } - r = request_firmware(&fw, fw_name, adev->dev); if (r) { dev_err(adev->dev, "can't load firmware \"%s\"\n", @@ -404,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, return 0; } +static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) +{ + if (amdgpu_discovery == 2) + return "amdgpu/ip_discovery.bin"; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return "amdgpu/vega10_ip_discovery.bin"; + case CHIP_VEGA12: + return "amdgpu/vega12_ip_discovery.bin"; + case CHIP_RAVEN: + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + return "amdgpu/raven2_ip_discovery.bin"; + else if (adev->apu_flags & AMD_APU_IS_PICASSO) + return "amdgpu/picasso_ip_discovery.bin"; + else + return "amdgpu/raven_ip_discovery.bin"; + case CHIP_VEGA20: + return "amdgpu/vega20_ip_discovery.bin"; + case CHIP_ARCTURUS: + return "amdgpu/arcturus_ip_discovery.bin"; + case CHIP_ALDEBARAN: + return "amdgpu/aldebaran_ip_discovery.bin"; + default: + return NULL; + } +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; + const char *fw_name; uint16_t offset; uint16_t size; uint16_t checksum; @@ -419,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return -ENOMEM; /* Read from file if it is the preferred option */ - if (amdgpu_discovery == 2) { + fw_name = amdgpu_discovery_get_fw_name(adev); + if (fw_name != NULL) { dev_info(adev->dev, "use ip discovery information from file"); - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); if (r) { dev_err(adev->dev, "failed to read ip discovery binary from file\n"); @@ -1290,6 +1317,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint16_t die_offset; uint16_t ip_offset; uint16_t num_dies; + uint32_t wafl_ver; uint16_t num_ips; uint16_t hw_id; uint8_t inst; @@ -1303,6 +1331,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return r; } + wafl_ver = 0; adev->gfx.xcc_mask = 0; adev->sdma.sdma_mask = 0; adev->vcn.inst_mask = 0; @@ -1403,6 +1432,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->gfx.xcc_mask |= (1U << ip->instance_number); + if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID) + wafl_ver = IP_VERSION_FULL(ip->major, ip->minor, + ip->revision, 0, 0); + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -1468,6 +1501,9 @@ next_ip: } } + if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0]) + adev->ip_versions[XGMI_HWIP][0] = wafl_ver; + return 0; } @@ -2511,6 +2547,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_RAVEN: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: + /* this is not fatal. We have a fallback below + * if the new firmwares are not present. some of + * this will be overridden below to keep things + * consistent with the current behavior. + */ + r = amdgpu_discovery_reg_base_init(adev); + if (!r) { + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } + break; + default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) + return -EINVAL; + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + break; + } + + switch (adev->asic_type) { + case CHIP_VEGA10: vega10_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 4; @@ -2673,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); break; } @@ -2772,10 +2832,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; } - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); - /* set NBIO version */ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 653f2bc77530..23cfce5aa1fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -139,6 +139,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), + AMDGPU_DEBUG_SMU_POOL = BIT(7), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -176,6 +177,7 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; +int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer * migration and restoration of backing memory when handling @@ -1038,6 +1040,13 @@ module_param(enforce_isolation, bool, 0444); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); /** + * DOC: modeset (int) + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). + */ +MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); +module_param_named(modeset, amdgpu_modeset, int, 0444); + +/** * DOC: seamless (int) * Seamless boot will keep the image on the screen during the boot process. */ @@ -1053,6 +1062,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * limits the VRAM size reported to ROCm applications to the visible * size, usually 256MB. * - 0x4: Disable GPU soft recovery, always do a full reset + * - 0x8: Use VRAM for firmware loading + * - 0x10: Enable ACA based RAS logging + * - 0x20: Enable experimental resets + * - 0x40: Disable ring resets + * - 0x80: Use VRAM for SMU pool */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); @@ -2230,6 +2244,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: ring reset disabled\n"); adev->debug_disable_gpu_ring_reset = true; } + if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) { + pr_info("debug: use vram for smu pool\n"); + adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2257,6 +2275,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) + return -EINVAL; + } + /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { if (amdgpu_unsupported_pciidlist[i] == pdev->device) @@ -2990,9 +3014,6 @@ static int __init amdgpu_init(void) { int r; - if (drm_firmware_drivers_only()) - return -EINVAL; - r = amdgpu_sync_init(); if (r) goto error_sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 984e6ff6e463..72af5e5a894a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1665,15 +1665,8 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) - /* Going from enabled to disabled */ - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - else if (!adev->enforce_isolation[i] && partition_values[i]) - /* Going from disabled to enabled */ - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + for (i = 0; i < num_partitions; i++) adev->enforce_isolation[i] = partition_values[i]; - } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -2160,11 +2153,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { - r = amdgpu_dpm_switch_power_profile(adev, profile, false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, - profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? - "fullscreen 3D" : "compute"); + mutex_lock(&adev->gfx.workload_profile_mutex); + if (adev->gfx.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, profile, false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? + "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = false; + } + mutex_unlock(&adev->gfx.workload_profile_mutex); } else { schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); } @@ -2183,13 +2181,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&adev->gfx.total_submission_cnt); - if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) { + cancel_delayed_work_sync(&adev->gfx.idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->gfx.workload_profile_active) + return; + + mutex_lock(&adev->gfx.workload_profile_mutex); + if (!adev->gfx.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, profile, true); if (r) dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = true; } + mutex_unlock(&adev->gfx.workload_profile_mutex); } void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 4b1675f79caa..87e862188766 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -482,6 +482,8 @@ struct amdgpu_gfx { atomic_t total_submission_cnt; struct delayed_work idle_work; + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4eefa17fa39b..464625282872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -573,6 +573,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; unsigned i; unsigned vmhub, inv_eng; + struct amdgpu_ring *shared_ring; /* init the vm inv eng for all vmhubs */ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { @@ -595,6 +596,10 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring == &adev->cper.ring_buf) continue; + /* Skip if the ring is a shared ring */ + if (amdgpu_sdma_is_shared_inv_eng(adev, ring)) + continue; + inv_eng = ffs(vm_inv_engs[vmhub]); if (!inv_eng) { dev_err(adev->dev, "no VM inv eng for ring %s\n", @@ -607,6 +612,21 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", ring->name, ring->vm_inv_eng, ring->vm_hub); + /* SDMA has a special packet which allows it to use the same + * invalidation engine for all the rings in one instance. + * Therefore, we do not allocate a separate VM invalidation engine + * for SDMA page rings. Instead, they share the VM invalidation + * engine with the SDMA gfx ring. This change ensures efficient + * resource management and avoids the issue of insufficient VM + * invalidation engines. + */ + shared_ring = amdgpu_sdma_get_shared_ring(adev, ring); + if (shared_ring) { + shared_ring->vm_inv_eng = ring->vm_inv_eng; + dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n", + ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub); + continue; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 8e712a11aba5..4c4e087230ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -209,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, return 0; } - fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT); if (!fences) return -ENOMEM; @@ -287,46 +287,34 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, (*id)->flushed_updates < updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && - !dma_fence_is_signaled((*id)->last_flush))) { + !dma_fence_is_signaled((*id)->last_flush))) + needs_flush = true; + + if ((*id)->owner != vm->immediate.fence_context || + (!adev->vm_manager.concurrent_flush && needs_flush)) { struct dma_fence *tmp; - /* Wait for the gang to be assembled before using a - * reserved VMID or otherwise the gang could deadlock. + /* Don't use per engine and per process VMID at the + * same time */ - tmp = amdgpu_device_get_gang(adev); - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { + if (adev->vm_manager.concurrent_flush) + ring = NULL; + + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { *id = NULL; - *fence = tmp; + *fence = dma_fence_get(tmp); return 0; } - dma_fence_put(tmp); - - /* Make sure the id is owned by the gang before proceeding */ - if (!job->gang_submit || - (*id)->owner != vm->immediate.fence_context) { - - /* Don't use per engine and per process VMID at the - * same time - */ - if (adev->vm_manager.concurrent_flush) - ring = NULL; - - /* to prevent one context starved by another context */ - (*id)->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); - if (tmp) { - *id = NULL; - *fence = dma_fence_get(tmp); - return 0; - } - } - needs_flush = true; } /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -385,7 +373,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * user of the VMID. */ r = amdgpu_sync_fence(&(*id)->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -422,7 +411,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -437,7 +426,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Remember this submission as user of the VMID */ r = amdgpu_sync_fence(&id->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) goto error; @@ -474,19 +464,14 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID - * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) { - return vm->reserved_vmid[vmhub] || - (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? - vm->root.bo->xcp_id : 0] && - AMDGPU_IS_GFXHUB(vmhub)); + return vm->reserved_vmid[vmhub]; } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 4012fb2dd08a..240fa6751260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,8 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 935df2cdcc16..acb21fc8b3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -361,17 +361,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct dma_fence *fence = NULL; + struct dma_fence *fence; int r; r = drm_sched_entity_error(s_entity); if (r) goto error; - if (job->gang_submit) + if (job->gang_submit) { fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + if (fence) + return fence; + } + + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); + if (fence) + return fence; - if (!fence && job->vm && !job->vmid) { + if (job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); @@ -384,9 +391,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, */ if (!fence) job->vm = NULL; + return fence; } - return fence; + return NULL; error: dma_fence_set_error(&job->base.s_fence->finished, r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 709c11cbeabd..85f774063f9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -145,9 +145,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_gfxhub = 0xffffff00; for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - /* use only 1st MEC pipes */ - if (i >= adev->gfx.mec.num_pipe_per_mec) - continue; + if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + break; adev->mes.compute_hqd_mask[i] = 0xc; } @@ -155,14 +154,9 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < - IP_VERSION(6, 0, 0)) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; - /* zero sdma_hqd_mask for non-existent engine */ - else if (adev->sdma.num_instances == 1) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; - else - adev->mes.sdma_hqd_mask[i] = 0xfc; + if (i >= adev->sdma.num_instances) + break; + adev->mes.sdma_hqd_mask[i] = 0xfc; } for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { @@ -1336,14 +1330,14 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, DRM_ERROR("failed to do vm_bo_update on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, bo_va->last_pt_update); + amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); r = amdgpu_vm_update_pdes(adev, vm, false); if (r) { DRM_ERROR("failed to update pdes on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, vm->last_update); + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); amdgpu_sync_wait(&sync, false); drm_exec_fini(&exec); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0f2eb69ad715..d54bb1377262 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -153,6 +153,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp) adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; ret = psp_init_cap_microcode(psp, ucode_prefix); break; + case IP_VERSION(13, 0, 12): + ret = psp_init_ta_microcode(psp, ucode_prefix); + break; default: return -EINVAL; } @@ -1861,6 +1864,7 @@ int psp_ras_initialize(struct psp_context *psp) if (adev->gmc.gmc_funcs->query_mem_partition_mode) ras_cmd->ras_in_message.init_flags.nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask; ret = psp_ta_load(psp, &psp->ras_context.context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index cfec29835634..bed2603ae4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3473,6 +3473,13 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) adev, control->bad_channel_bitmap); con->update_channel_flag = false; } + + /* The format action is only applied to new ASICs */ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 && + control->tbl_hdr.version < RAS_TABLE_VER_V3) + if (!amdgpu_ras_eeprom_reset_table(control)) + if (amdgpu_ras_save_bad_pages(adev, NULL)) + dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 3597ecd9baca..0ea7cfaf3587 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return true; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return (adev->gmc.is_app_apu) ? false : true; default: @@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): control->i2c_address = EEPROM_I2C_MADDR_4; return true; @@ -413,9 +415,11 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 10, 0): - case IP_VERSION(12, 0, 0): hdr->version = RAS_TABLE_VER_V2_1; return; + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V3; + return; default: hdr->version = RAS_TABLE_VER_V1; return; @@ -443,7 +447,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) hdr->header = RAS_TABLE_HDR_VAL; amdgpu_ras_set_eeprom_table_version(control); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; hdr->tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE; @@ -461,7 +465,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) } csum = __calc_hdr_byte_sum(control); - if (hdr->version == RAS_TABLE_VER_V2_1) + if (hdr->version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); csum = -csum; hdr->checksum = csum; @@ -757,7 +761,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } @@ -770,7 +774,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) amdgpu_dpm_send_rma_reason(adev); } - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -810,7 +814,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) * now calculate gpu health percent */ if (amdgpu_bad_page_threshold != 0 && - control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && + control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 && control->ras_num_bad_pages <= ras->bad_page_cnt_threshold) control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - control->ras_num_bad_pages) * 100) / @@ -823,7 +827,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) csum += *pp; csum += __calc_hdr_byte_sum(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); /* avoid sign extension when assigning to "checksum" */ csum = -csum; @@ -1040,7 +1044,7 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co /* get available eeprom table version first before eeprom table init */ amdgpu_ras_set_eeprom_table_version(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else return RAS_MAX_RECORD_COUNT; @@ -1285,7 +1289,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control int buf_size, res; u8 csum, *buf, *pp; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) buf_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -1388,7 +1392,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; @@ -1428,7 +1432,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", control->ras_num_bad_pages); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; @@ -1448,7 +1452,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 13f7eda9a696..ec6d7ea37ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -28,6 +28,7 @@ #define RAS_TABLE_VER_V1 0x00010000 #define RAS_TABLE_VER_V2_1 0x00021000 +#define RAS_TABLE_VER_V3 0x00030000 struct amdgpu_device; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b4fd1e17205e..bb2b66385223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -37,7 +37,7 @@ struct amdgpu_job; struct amdgpu_vm; /* max number of rings */ -#define AMDGPU_MAX_RINGS 133 +#define AMDGPU_MAX_RINGS 149 #define AMDGPU_MAX_HWIP_RINGS 64 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_SW_GFX_RINGS 2 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 3a4cef896018..529c9696c2f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -504,6 +504,39 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) } } +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (adev->sdma.has_page_queue && + (ring->me < adev->sdma.num_instances) && + (ring == &adev->sdma.instance[ring->me].ring)) + return &adev->sdma.instance[ring->me].page; + else + return NULL; +} + +/** +* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine +* @adev: Pointer to the AMDGPU device structure +* @ring: Pointer to the ring structure to check +* +* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine. +* It returns true if the ring is such an SDMA ring, false otherwise. +*/ +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + int i = ring->me; + + if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances) + return false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + return (ring == &adev->sdma.instance[i].page); + else + return false; +} + /** * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions @@ -532,7 +565,6 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device * @instance_id: ID of the SDMA engine instance to reset - * @suspend_user_queues: check if suspend user queue. * * This function performs the following steps: * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. @@ -541,22 +573,16 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * * Returns: 0 on success, or a negative error code on failure. */ -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues) +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) { struct sdma_on_reset_funcs *funcs; int ret = 0; - struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];; + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; struct amdgpu_ring *page_ring = &sdma_instance->page; bool gfx_sched_stopped = false, page_sched_stopped = false; - /* Suspend KFD if suspend_user_queues is true. - * prevent the destruction of in-flight healthy user queue packets and - * avoid race conditions between KFD and KGD during the reset process. - */ - if (suspend_user_queues) - amdgpu_amdkfd_suspend(adev, false); - + mutex_lock(&sdma_instance->engine_reset_mutex); /* Stop the scheduler's work queue for the GFX and page rings if they are running. * This ensures that no new tasks are submitted to the queues while * the reset is in progress. @@ -609,7 +635,7 @@ exit: * if they were stopped by this function. This allows new tasks * to be submitted to the queues after the reset is complete. */ - if (ret) { + if (!ret) { if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { drm_sched_wqueue_start(&gfx_ring->sched); } @@ -617,9 +643,7 @@ exit: drm_sched_wqueue_start(&page_ring->sched); } } - - if (suspend_user_queues) - amdgpu_amdkfd_resume(adev, false); + mutex_unlock(&sdma_instance->engine_reset_mutex); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 965169320065..47d56fd0589f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -64,6 +64,11 @@ struct amdgpu_sdma_instance { struct amdgpu_bo *sdma_fw_obj; uint64_t sdma_fw_gpu_addr; uint32_t *sdma_fw_ptr; + struct mutex engine_reset_mutex; + /* track guilty state of GFX and PAGE queues */ + bool gfx_guilty; + bool page_guilty; + }; enum amdgpu_sdma_ras_memory_id { @@ -126,9 +131,6 @@ struct amdgpu_sdma { uint32_t *ip_dump; uint32_t supported_reset; struct list_head reset_callback_list; - /* track guilty state of GFX and PAGE queues */ - bool gfx_guilty; - bool page_guilty; }; /* @@ -169,7 +171,7 @@ struct amdgpu_buffer_funcs { }; void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) @@ -194,4 +196,7 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev); +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring); +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586ab4c911b..5576ed0b508f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) struct amdgpu_sync_entry *e; hash_for_each_possible(sync->fences, e, node, f->context) { - if (unlikely(e->fence->context != f->context)) - continue; + if (dma_fence_is_signaled(e->fence)) { + dma_fence_put(e->fence); + e->fence = dma_fence_get(f); + return true; + } - amdgpu_sync_keep_later(&e->fence, f); - return true; + if (likely(e->fence->context == f->context)) { + amdgpu_sync_keep_later(&e->fence, f); + return true; + } } return false; } @@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) * * @sync: sync object to add fence to * @f: fence to sync to + * @flags: memory allocation flags to use when allocating sync entry * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags) { struct amdgpu_sync_entry *e; @@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) if (amdgpu_sync_add_later(sync, f)) return 0; - e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); + e = kmem_cache_alloc(amdgpu_sync_slab, flags); if (!e) return -ENOMEM; @@ -249,7 +256,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_fence *tmp = dma_fence_chain_contained(f); if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); dma_fence_put(f); if (r) return r; @@ -281,7 +288,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) if (fence_owner != AMDGPU_FENCE_OWNER_KFD) continue; - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); if (r) break; } @@ -388,7 +395,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f); + r = amdgpu_sync_fence(clone, f, GFP_KERNEL); if (r) return r; } else { @@ -400,6 +407,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } /** + * amdgpu_sync_move - move all fences from src to dst + * + * @src: source of the fences, empty after function + * @dst: destination for the fences + * + * Moves all fences from source to destination. All fences in destination are + * freed and source is empty after the function call. + */ +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) +{ + unsigned int i; + + amdgpu_sync_free(dst); + + for (i = 0; i < HASH_SIZE(src->fences); ++i) + hlist_move_list(&src->fences[i], &dst->fences[i]); +} + +/** * amdgpu_sync_push_to_job - push fences into job * @sync: sync object to get the fences from * @job: job to push the fences into diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index e3272dce798d..51eb4382c91e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,7 +47,8 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); @@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 383fce40d4dd..11dd2e0f7979 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -457,6 +457,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed, TP_ARGS(pasid) ); +TRACE_EVENT(amdgpu_isolation, + TP_PROTO(void *prev, void *next), + TP_ARGS(prev, next), + TP_STRUCT__entry( + __field(void *, prev) + __field(void *, next) + ), + + TP_fast_assign( + __entry->prev = prev; + __entry->next = next; + ), + TP_printk("prev=%p, next=%p", + __entry->prev, + __entry->next) +); + +TRACE_EVENT(amdgpu_cleaner_shader, + TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence), + TP_ARGS(ring, fence), + TP_STRUCT__entry( + __string(ring, ring->name) + __field(u64, seqno) + ), + + TP_fast_assign( + __assign_str(ring); + __entry->seqno = fence->seqno; + ), + TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno) +); + TRACE_EVENT(amdgpu_bo_list_set, TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), TP_ARGS(list, bo), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 8d8b39e6d197..1991dd3d1056 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -438,10 +438,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } @@ -455,13 +460,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&vcn_inst->total_submission_cnt); - if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) { + cancel_delayed_work_sync(&vcn_inst->idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; + + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); + true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); +pg_lock: mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 26c9c2d90f45..cdcdae7f71ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -358,6 +358,9 @@ struct amdgpu_vcn { bool per_inst_fw; unsigned fw_version; + + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1c0fd95c3820..ce52b4d75e94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -754,6 +754,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; @@ -761,8 +762,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool gds_switch_needed = ring->funcs->emit_gds_switch && job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; - struct dma_fence *fence = NULL; + bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; + struct dma_fence *fence = NULL; unsigned int patch; int r; @@ -785,8 +787,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && job->base.s_fence && + &job->base.s_fence->scheduled == isolation->spearhead; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && - !(job->enforce_isolation && !job->vmid)) + !cleaner_shader_needed) return 0; amdgpu_ring_ib_begin(ring); @@ -797,9 +803,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) + if (cleaner_shader_needed) ring->funcs->emit_cleaner_shader(ring); if (vm_flush_needed) { @@ -821,7 +825,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, job->oa_size); } - if (vm_flush_needed || pasid_mapping_needed) { + if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; @@ -843,6 +847,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, id->pasid_mapping = dma_fence_get(fence); mutex_unlock(&id_mgr->lock); } + + /* + * Make sure that all other submissions wait for the cleaner shader to + * finish before we push them to the HW. + */ + if (cleaner_shader_needed) { + trace_amdgpu_cleaner_shader(ring, fence); + mutex_lock(&adev->enforce_isolation_mutex); + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(fence); + mutex_unlock(&adev->enforce_isolation_mutex); + } dma_fence_put(fence); amdgpu_ring_patch_cond_exec(ring, patch); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b9f6d89dafb2..d8772cd6db63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1626,6 +1626,20 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 26 && + adev->mes.fw_version[0] >= 114) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index da364e04f09c..dceb5ad38862 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2637,7 +2637,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -2692,12 +2691,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v12_0_cp_gfx_start(adev); - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -3037,10 +3030,6 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) if (r) goto done; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } done: return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index efe45e4edfd7..736398b0d16d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -867,9 +867,8 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: - bank->aca_err_type = ACA_ERROR_TYPE_UE; - ret = aca_error_cache_log_bank_error(handle, &info, - ACA_ERROR_TYPE_UE, 1ULL); + bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index fd34dc138081..dc94d58d33a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -31,6 +31,7 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" +#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -672,12 +673,11 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * @adev: amdgpu_device pointer * @i: instance to resume * @restore: used to restore wptr when restart - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -714,7 +714,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b /* For the guilty queue, set RPTR to the current wptr to skip bad commands, * It is not a guilty queue, restore cache_rptr and continue execution. */ - if (guilty) + if (adev->sdma.instance[i].gfx_guilty) rwptr = ring->wptr; else rwptr = ring->cached_rptr; @@ -779,12 +779,11 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b * @adev: amdgpu_device pointer * @i: instance to resume * @restore: boolean to say restore needed or not - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -803,7 +802,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, /* For the guilty queue, set RPTR to the current wptr to skip bad commands, * It is not a guilty queue, restore cache_rptr and continue execution. */ - if (guilty) + if (adev->sdma.instance[i].page_guilty) rwptr = ring->wptr; else rwptr = ring->cached_rptr; @@ -989,9 +988,9 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i, restore, adev->sdma.page_guilty); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); @@ -1292,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } - -/** - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA +/* + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value + * @vmid: The VMID to invalidate + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) * - * @ring: amdgpu_ring pointer - * @vmid: vmid number to use - * @pd_addr: address + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and + * L2 PDEs) are invalidated. + */ +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA + * @ring: The SDMA ring + * @vmid: The VMID to flush + * @pd_addr: The page directory address * - * Update the page table base and flush the VM TLB - * using sDMA. + * This function emits the necessary register writes and waits to perform a VM flush for the + * specified VMID. It updates the PTB address registers and issues a VM invalidation request + * using the specified VM invalidation engine. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_device *adev = ring->adev; + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); + unsigned int eng = ring->vm_inv_eng; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + /* + * Construct and emit the VM invalidation packet + */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1445,6 +1494,11 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + /* Initialize guilty flags for GFX and PAGE queues */ + adev->sdma.instance[i].gfx_guilty = false; + adev->sdma.instance[i].page_guilty = false; + ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1506,9 +1560,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; - /* Initialize guilty flags for GFX and PAGE queues */ - adev->sdma.gfx_guilty = false; - adev->sdma.page_guilty = false; return r; } @@ -1666,7 +1717,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 id = GET_INST(SDMA0, ring->me); - return amdgpu_sdma_reset_engine(adev, id, true); + int r; + + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_amdkfd_suspend(adev, false); + r = amdgpu_sdma_reset_engine(adev, id); + amdgpu_amdkfd_resume(adev, false); + + return r; } static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) @@ -1679,9 +1739,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return -EINVAL; /* Check if this queue is the guilty one */ - adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, false); + adev->sdma.instance[instance_id].gfx_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); if (adev->sdma.has_page_queue) - adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, true); + adev->sdma.instance[instance_id].page_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); /* Cache the rptr before reset, after the reset, * all of the registers will be reset to 0 @@ -2115,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2148,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2347,6 +2407,9 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) */ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) { + /* per queue reset not supported for SRIOV */ + if (amdgpu_sriov_vf(adev)) + return; /* * the user queue relies on MEC fw and pmfw when the sdma queue do reset. diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 64891f099366..a3b5fda22432 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -151,6 +151,7 @@ struct ta_ras_init_flags { uint16_t xcc_mask; uint8_t channel_dis_num; uint8_t nps_mode; + uint32_t active_umc_mask; }; struct ta_ras_mca_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ff03436698a4..d716510b8dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -147,10 +147,15 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); } @@ -164,13 +169,26 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&adev->vcn.inst[0].total_submission_cnt); - if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) { + cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; + + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); +pg_lock: mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h index 8de4ccce5e38..3ca8a417c6d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -64,6 +64,9 @@ #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 +/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ +#define SDMA_OP_VM_INVALIDATE 8 +#define SDMA_SUBOP_VM_INVALIDATE 4 /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 @@ -3331,5 +3334,72 @@ #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) +/* +** Definitions for SDMA_PKT_VM_INVALIDATION packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 +#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) + +/*define for xcc0_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) + +/*define for xcc1_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) + +/*define for mmhub_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 +#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) + +/*define for INVALIDATEREQ word*/ +/*define for invalidatereq field*/ +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) + +/*define for ADDRESSRANGELO word*/ +/*define for addressrangelo field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) + +/*define for ADDRESSRANGEHI word*/ +/*define for invalidateack field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) + +/*define for addressrangehi field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) + +/*define for reserved field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) #endif /* __SDMA_PKT_OPEN_H_ */ |