From d25e3a610bae03bffc5c14b5d944a5d0cd844678 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 22 Oct 2025 08:34:03 +0200 Subject: drm/sched: Fix race in drm_sched_entity_select_rq() In a past bug fix it was forgotten that entity access must be protected by the entity lock. That's a data race and potentially UB. Move the spin_unlock() to the appropriate position. Cc: stable@vger.kernel.org # v5.13+ Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3") Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/scheduler/sched_entity.c') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5a4697f636f2..aa222166de58 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -552,10 +552,11 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->lock); if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); } /** -- cgit v1.2.3 From 9e8b3201c7302d5b522ba3535630bed21cc03c27 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Wed, 15 Oct 2025 16:01:28 +0200 Subject: drm/sched: avoid killing parent entity on child SIGKILL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DRM scheduler tracks who last uses an entity and when that process is killed blocks all further submissions to that entity. The problem is that we didn't track who initially created an entity, so when a process accidently leaked its file descriptor to a child and that child got killed, we killed the parent's entities. Avoid that and instead initialize the entities last user on entity creation. This also allows to drop the extra NULL check. Signed-off-by: David Rosca Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4568 Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Acked-by: Philipp Stanner Link: https://lore.kernel.org/r/20251015140128.1470-1-christian.koenig@amd.com Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251015140128.1470-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/scheduler/sched_entity.c') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index aa222166de58..c8e949f4a568 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -70,6 +70,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + entity->last_user = current->group_leader; /* * It's perfectly valid to initialize an entity without having a valid * scheduler attached. It's just not valid to use the scheduler before it @@ -302,7 +303,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); - if ((!last_user || last_user == current->group_leader) && + if (last_user == current->group_leader && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) drm_sched_entity_kill(entity); -- cgit v1.2.3