summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2025-12-03 09:43:09 +1000
committerDave Airlie <airlied@redhat.com>2025-12-03 09:43:49 +1000
commit0692602defb0c273f80dec9c564ca50726404aca (patch)
tree2fea4ab096be220f377eab9934c39790cb453c83 /drivers/gpu/drm/amd/amdgpu
parentb3239df349c2c2c94686674489c9629c89ca49a1 (diff)
parent3925683515e93844be204381d2d5a1df5de34f31 (diff)
Merge tag 'amd-drm-next-6.19-2025-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.19-2025-12-02: amdgpu: - Unified MES fix - SMU 11 unbalanced irq fix - Fix for driver reloading on APUs - pp_table sysfs fix - Fix memory leak in fence handling - HDMI fix - DC cursor fixes - eDP panel parsing fix - Brightness fix - DC analog fixes - EDID retry fixes - UserQ fixes - RAS fixes - IP discovery fix - Add missing locking in amdgpu_ttm_access_memory_sdma() - Smart Power OLED fix - PRT and page fault fixes for GC 6-8 - VMID reservation fix - ACP platform device fix - Add missing vm fault handling for GC 11-12 - VPE fix amdkfd: - Partitioning fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20251202220101.2039347-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c4
26 files changed, 228 insertions, 28 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 4926996f94da..381ef205b0df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -302,17 +302,19 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
adev->acp.acp_cell[0].num_resources = 3;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
if (r)
goto failure;
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
@@ -410,30 +412,34 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].id = 0;
adev->acp.acp_cell[0].num_resources = 5;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].id = 1;
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].id = 2;
adev->acp.acp_cell[2].num_resources = 1;
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].id = 3;
adev->acp.acp_cell[3].num_resources = 1;
adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
+ r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
if (r)
goto failure;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bfbf874a1000..903c4706040d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2665,6 +2665,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "navi12";
break;
case CHIP_CYAN_SKILLFISH:
+ if (adev->discovery.bin)
+ return 0;
chip_name = "cyan_skillfish";
break;
}
@@ -3680,6 +3682,20 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
"failed to release exclusive mode on fini\n");
}
+ /*
+ * Driver reload on the APU can fail due to firmware validation because
+ * the PSP is always running, as it is shared across the whole SoC.
+ * This same issue does not occur on dGPU because it has a mechanism
+ * that checks whether the PSP is running. A solution for those issues
+ * in the APU is to trigger a GPU reset, but this should be done during
+ * the unload phase to avoid adding boot latency and screen flicker.
+ */
+ if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+ r = amdgpu_asic_reset(adev);
+ if (r)
+ dev_err(adev->dev, "asic reset on %s failed\n", __func__);
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 9dcf51991b5b..869bceb0fe2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -597,6 +597,9 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
/* reserve engine 5 for firmware */
if (adev->enable_mes)
vm_inv_engs[i] &= ~(1 << 5);
+ /* reserve engine 6 for uni mes */
+ if (adev->enable_uni_mes)
+ vm_inv_engs[i] &= ~(1 << 6);
/* reserve mmhub engine 3 for firmware */
if (adev->enable_umsch_mm)
vm_inv_engs[i] &= ~(1 << 3);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 55097ca10738..727342689d4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -86,6 +86,11 @@ enum amdgpu_memory_partition {
#define AMDGPU_MAX_MEM_RANGES 8
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ 0x40
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
+
/*
* GMC page fault information
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 7d8ef7ae10c2..0a0dcbf0798d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -224,6 +224,7 @@ err_fence:
kfree((*job)->hw_fence);
err_job:
kfree(*job);
+ *job = NULL;
return r;
}
@@ -245,7 +246,10 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
if (r) {
if (entity)
drm_sched_job_cleanup(&(*job)->base);
+ kfree((*job)->hw_vm_fence);
+ kfree((*job)->hw_fence);
kfree(*job);
+ *job = NULL;
}
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9e2e098af86c..2a6cf7963dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -150,6 +150,8 @@ static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+static void
+amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
struct amdgpu_device *devs[MAX_GPU_INSTANCE];
int num_gpu;
@@ -3954,7 +3956,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
mutex_unlock(&con->recovery_lock);
amdgpu_ras_critical_region_init(adev);
-
+#ifdef CONFIG_X86_MCE_AMD
+ amdgpu_unregister_bad_pages_mca_notifier(adev);
+#endif
return 0;
}
/* recovery end */
@@ -4988,6 +4992,28 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
notifier_registered = true;
}
}
+static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ if (!notifier_registered && !mce_adev_list.num_gpu)
+ return;
+ for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
+ if (mce_adev_list.devs[i] == adev)
+ mce_adev_list.devs[i] = NULL;
+ if (!mce_adev_list.devs[i])
+ ++j;
+ }
+
+ if (j == mce_adev_list.num_gpu) {
+ mce_adev_list.num_gpu = 0;
+ /* Unregister x86 notifier with MCE subsystem. */
+ if (notifier_registered) {
+ mce_unregister_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = false;
+ }
+ }
+}
#endif
struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index cd8873c6931a..c596b6df2e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -520,9 +520,14 @@ static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
return -ENOMEM;
if (!(*offset)) {
+ /* Need at least 12 bytes for the header on the first read */
+ if (size < ring_header_size)
+ return -EINVAL;
+
if (copy_to_user(buf, ring_header, ring_header_size))
return -EFAULT;
buf += ring_header_size;
+ size -= ring_header_size;
}
r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 421c2bbe2b6a..2b931e855abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1329,7 +1329,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
- if (ttm->caching == ttm_cached)
+ if (ttm && ttm->caching == ttm_cached)
flags |= AMDGPU_PTE_SNOOPED;
}
@@ -1486,6 +1486,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
if (r)
goto out;
+ mutex_lock(&adev->mman.gtt_window_lock);
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
src_mm.start;
@@ -1500,6 +1501,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
WARN_ON(job->ibs[0].length_dw > num_dw);
fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
r = -ETIMEDOUT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7fc081e88b6a..a67285118c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1069,7 +1069,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
}
/* Prepare a TLB flush fence to be attached to PTs */
- if (!params->unlocked && vm->is_compute_context) {
+ if (!params->unlocked) {
amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
/* Makes sure no PD/PT is freed before the flush */
@@ -2093,7 +2093,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = before->bo_va->base.bo;
amdgpu_vm_it_insert(before, &vm->va);
- if (before->flags & AMDGPU_PTE_PRT_FLAG(adev))
+ if (before->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
@@ -2108,7 +2108,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = after->bo_va->base.bo;
amdgpu_vm_it_insert(after, &vm->va);
- if (after->flags & AMDGPU_PTE_PRT_FLAG(adev))
+ if (after->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
@@ -2916,8 +2916,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
- break;
+ return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
case AMDGPU_VM_OP_UNRESERVE_VMID:
amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 41f4705bdbbd..876a3256dba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -156,6 +156,9 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
/* enable irqs */
cik_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -192,6 +195,9 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
/* When a ring buffer overflow happen start parsing interrupt
@@ -211,6 +217,8 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -306,6 +314,10 @@ static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 2f891fb846d5..bc7a2e06ab5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -157,6 +157,9 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
cz_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -297,6 +303,10 @@ static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 02d7cfae22bd..8a2ee2de390f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5874,9 +5874,9 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
if (flags & AMDGPU_IB_PREEMPTED)
control |= INDIRECT_BUFFER_PRE_RESUME(1);
- if (vmid)
+ if (vmid && !ring->adev->gfx.rs64_enable)
gfx_v11_0_ring_emit_de_meta(ring,
- (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+ !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
}
amdgpu_ring_write(ring, header);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d7499be8c4bf..ce6e04242c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -103,8 +103,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ?
AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 7bc389d9f5c4..ba59ee8e398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -103,12 +103,41 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ?
AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0;
u64 addr;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index cad2d19105c4..7a9d6894e321 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -91,6 +91,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_vmhub *hub;
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0;
u64 addr;
@@ -102,6 +106,31 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
else
hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
+ return 1;
+ }
+
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 499dfd78092d..a8ec95f42926 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -610,23 +610,21 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+ u32 status, u32 addr)
{
u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
- char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
- (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from %d\n",
protections, vmid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
- "write" : "read", block, mc_client, mc_id);
+ "write" : "read", mc_id);
}
static const u32 mc_cg_registers[] = {
@@ -1072,6 +1070,12 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
@@ -1079,6 +1083,10 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT,
+ status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v6_0_set_fault_enable_default(adev, false);
@@ -1089,7 +1097,7 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+ gmc_v6_0_vm_decode_fault(adev, status, addr);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0e5e54d0a9a5..fbd0bf147f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1261,6 +1261,12 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status, mc_client, vmid;
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e1509480dfc2..6551b60f2584 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1439,6 +1439,12 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+ /* Delegate to the soft IRQ handler ring */
+ if (adev->irq.ih_soft.enabled && entry->ih != &adev->irq.ih_soft) {
+ amdgpu_irq_delegate(adev, entry, 4);
+ return 1;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e716097dfde4..8ad7519f7b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -544,8 +544,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- bool retry_fault = !!(entry->src_data[1] & 0x80);
- bool write_fault = !!(entry->src_data[1] & 0x20);
+ bool retry_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY);
+ bool write_fault = !!(entry->src_data[1] &
+ AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE);
uint32_t status = 0, cid = 0, rw = 0, fed = 0;
struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 1317ede131b6..01cadf898c00 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -157,6 +157,9 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
iceland_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -194,6 +197,9 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -296,6 +302,10 @@ static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
r = amdgpu_irq_init(adev);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 0ceeb19df2e5..217040044987 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1390,7 +1390,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
break;
case IP_VERSION(6, 0, 3):
- if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ if (adev->sdma.instance[0].fw_version >= 29 && !adev->sdma.disable_uq)
adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
break;
case IP_VERSION(6, 1, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 1df00f8a2406..66f650f87243 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -96,6 +96,9 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
pci_set_master(adev->pdev);
si_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -112,6 +115,9 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
@@ -127,6 +133,8 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
}
+
+out:
return (wptr & ih->ptr_mask);
}
@@ -175,6 +183,10 @@ static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
return amdgpu_irq_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 9785fada4fa7..42f5d9c0e3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -853,10 +853,6 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
{
u32 sol_reg;
- /* CP hangs in IGT reloading test on RN, reset to WA */
- if (adev->asic_type == CHIP_RENOIR)
- return true;
-
if (amdgpu_gmc_need_reset_on_init(adev))
return true;
if (amdgpu_psp_tos_reload_needed(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 7d17ae56f901..ee8038df17e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -159,6 +159,9 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
tonga_ih_enable_interrupts(adev);
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
return 0;
}
@@ -196,6 +199,9 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
wptr = le32_to_cpu(*ih->wptr_cpu);
+ if (ih == &adev->irq.ih_soft)
+ goto out;
+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
@@ -306,6 +312,10 @@ static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index eacf4e93ba2f..cb7123ec1a5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -141,7 +141,7 @@ static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block)
adev->vcn.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
- if (amdgpu_dpm_reset_vcn_is_supported(adev))
+ if (amdgpu_dpm_reset_vcn_is_supported(adev) && !amdgpu_sriov_vf(adev))
adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 714350cabf2f..8bd457dea4cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -122,7 +122,9 @@ static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 12):
- if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev))
+ if ((adev->psp.sos.fw_version >= 0x00450025) &&
+ amdgpu_dpm_reset_vcn_is_supported(adev) &&
+ !amdgpu_sriov_vf(adev))
adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default: