drm/amdgpu/vce1: Ensure VCPU BO is in lower 32-bit address space (v3)

Based on research and ideas by Alexandre and Christian. VCE1 actually executes its code from the VCPU BO. Due to various hardware limitations, the VCE1 requires the VCPU BO to be in the low 32 bit address range. However, VRAM is typically mapped at the high address range, which means the VCPU can't access VRAM through the FB aperture. To solve this, we write a few page table entries to map the VCPU BO in the GART address range. And we make sure that the GART is located at the low address range. That way the VCE1 can access the VCPU BO. v2: - Adjust to v2 of the GART helper commit. - Add empty line to multi-line comment. v3: - Instead of relying on gmc_v6 to set the GART space before GTT, add a new function amdgpu_vce_required_gart_pages() which is called from amdgpu_gtt_mgr_init() directly. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Co-developed-by: Alexandre Demers <alexandre.f.demers@gmail.com> Signed-off-by: Alexandre Demers <alexandre.f.demers@gmail.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Timur Kristóf <timur.kristof@gmail.com> 2025-11-07 16:57:42 +0100
committer: Alex Deucher <alexander.deucher@amd.com> 2025-11-11 21:54:18 -0500
commit: 221cadb9c6bc2e179a717aac706dbbc9b3377acc (patch)
tree: 362e41cc46ded1781e39d6327ab246fd33787b80 /drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
parent: baf75a087c41eeb03c471099dc5d77e3b068c33b (diff)
1 files changed, 55 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
index bf9f943852cb..9ae424618556 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -34,6 +34,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_vce.h"
+#include "amdgpu_gart.h"
 #include "sid.h"
 #include "vce_v1_0.h"
 #include "vce/vce_1_0_d.h"
@@ -46,6 +47,11 @@
 #define VCE_V1_0_DATA_SIZE	(7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
 
+#define VCE_V1_0_GART_PAGE_START \
+	(AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
+#define VCE_V1_0_GART_ADDR_START \
+	(VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
+
 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
 
@@ -513,6 +519,49 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+/**
+ * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Due to various hardware limitations, the VCE1 requires
+ * the VCPU BO to be in the low 32 bit address range.
+ * Ensure that the VCPU BO has a 32-bit GPU address,
+ * or return an error code when that isn't possible.
+ *
+ * To accomodate that, we put GART to the LOW address range
+ * and reserve some GART pages where we map the VCPU BO,
+ * so that it gets a 32-bit address.
+ */
+static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
+{
+	u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
+	u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
+	u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+	u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
+	u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
+	u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
+
+	/*
+	 * Check if the VCPU BO already has a 32-bit address.
+	 * Eg. if MC is configured to put VRAM in the low address range.
+	 */
+	if (gpu_addr <= max_vcpu_bo_addr)
+		return 0;
+
+	/* Check if we can map the VCPU BO in GART to a 32-bit address. */
+	if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
+		return -EINVAL;
+
+	amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
+				   num_pages, flags, adev->gart.ptr);
+	adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
+	if (adev->vce.gpu_addr > max_vcpu_bo_addr)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
@@ -534,6 +583,9 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
 	r = vce_v1_0_load_fw_signature(adev);
 	if (r)
 		return r;
+	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+	if (r)
+		return r;
 
 	for (i = 0; i < adev->vce.num_rings; i++) {
 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
@@ -649,6 +701,9 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
 	r = vce_v1_0_load_fw_signature(adev);
 	if (r)
 		return r;
+	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
+	if (r)
+		return r;
 
 	return vce_v1_0_hw_init(ip_block);
 }
author	Timur Kristóf <timur.kristof@gmail.com>	2025-11-07 16:57:42 +0100
committer	Alex Deucher <alexander.deucher@amd.com>	2025-11-11 21:54:18 -0500
commit	221cadb9c6bc2e179a717aac706dbbc9b3377acc (patch)
tree	362e41cc46ded1781e39d6327ab246fd33787b80 /drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
parent	baf75a087c41eeb03c471099dc5d77e3b068c33b (diff)