drm/amdgpu: clean up and unify hw fence handling

Decouple the amdgpu fence from the amdgpu_job structure. This lets us clean up the separate fence ops for the embedded fence and other fences. This also allows us to allocate the vm fence up front when we allocate the job. v2: Additional cleanup suggested by Christian v3: Additional cleanups suggested by Christian v4: Additional cleanups suggested by David and vm fence fix v5: cast seqno (David) Cc: David.Wu3@amd.com Cc: christian.koenig@amd.com Tested-by: David (Ming Qiang) Wu <David.Wu3@amd.com> Reviewed-by: David (Ming Qiang) Wu <David.Wu3@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Alex Deucher <alexander.deucher@amd.com> 2025-08-27 11:34:14 -0400
committer: Alex Deucher <alexander.deucher@amd.com> 2025-10-13 14:14:35 -0400
commit: db36632ea51e8b02dd637d291c755899b20a5a31 (patch)
tree: 403dc26146ef2bad5f69b899d11c683165d55f68 /drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
parent: 3cd7ceee9a17a0db7f8e3bf260a3762bd4586ac1 (diff)
1 files changed, 30 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d020a890a0ea..e08d837668f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -137,7 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 		   ring->funcs->reset) {
 		dev_err(adev->dev, "Starting %s ring reset\n",
 			s_job->sched->name);
-		r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+		r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
 		if (!r) {
 			atomic_inc(&ring->adev->gpu_reset_counter);
 			dev_err(adev->dev, "Ring %s reset succeeded\n",
@@ -186,6 +186,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		     unsigned int num_ibs, struct amdgpu_job **job,
 		     u64 drm_client_id)
 {
+	struct amdgpu_fence *af;
+	int r;
+
 	if (num_ibs == 0)
 		return -EINVAL;
 
@@ -193,6 +196,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (!*job)
 		return -ENOMEM;
 
+	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+	if (!af) {
+		r = -ENOMEM;
+		goto err_job;
+	}
+	(*job)->hw_fence = af;
+
+	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+	if (!af) {
+		r = -ENOMEM;
+		goto err_fence;
+	}
+	(*job)->hw_vm_fence = af;
+
 	(*job)->vm = vm;
 
 	amdgpu_sync_create(&(*job)->explicit_sync);
@@ -204,6 +221,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	return drm_sched_job_init(&(*job)->base, entity, 1, owner,
 				  drm_client_id);
+
+err_fence:
+	kfree((*job)->hw_fence);
+err_job:
+	kfree(*job);
+
+	return r;
 }
 
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
@@ -251,11 +275,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 	struct dma_fence *f;
 	unsigned i;
 
-	/* Check if any fences where initialized */
+	/* Check if any fences were initialized */
 	if (job->base.s_fence && job->base.s_fence->finished.ops)
 		f = &job->base.s_fence->finished;
-	else if (job->hw_fence.base.ops)
-		f = &job->hw_fence.base;
+	else if (job->hw_fence && job->hw_fence->base.ops)
+		f = &job->hw_fence->base;
 	else
 		f = NULL;
 
@@ -271,11 +295,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
-	/* only put the hw fence if has embedded fence */
-	if (!job->hw_fence.base.ops)
-		kfree(job);
-	else
-		dma_fence_put(&job->hw_fence.base);
+	kfree(job);
 }
 
 void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -304,10 +324,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
-	if (!job->hw_fence.base.ops)
-		kfree(job);
-	else
-		dma_fence_put(&job->hw_fence.base);
+	kfree(job);
 }
 
 struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
author	Alex Deucher <alexander.deucher@amd.com>	2025-08-27 11:34:14 -0400
committer	Alex Deucher <alexander.deucher@amd.com>	2025-10-13 14:14:35 -0400
commit	db36632ea51e8b02dd637d291c755899b20a5a31 (patch)
tree	403dc26146ef2bad5f69b899d11c683165d55f68 /drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
parent	3cd7ceee9a17a0db7f8e3bf260a3762bd4586ac1 (diff)