summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorYiPeng Chai <YiPeng.Chai@amd.com>2025-10-28 16:18:31 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-11-04 11:53:59 -0500
commitd95ca7f515cfd2e721de07e86aa79adb17575a52 (patch)
tree726550907388bb73193175e1da23f00e7e8a80ea /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parentd4432f16d3393180e8f0b344b21839e553f7938b (diff)
drm/amdgpu: suspend ras module before gpu reset
During gpu reset, all GPU-related resources are inaccessible. To avoid affecting ras functionality, suspend ras module before gpu reset and resume it after gpu reset is complete. V2: Rename functions to avoid misunderstanding. V3: Move flush_delayed_work to amdgpu_ras_process_pause, Move schedule_delayed_work to amdgpu_ras_process_unpause. V4: Rename functions. V5: Move the function to amdgpu_ras.c. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Acked-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 95f7ae36e4f1..dcf6fce1c5a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -71,6 +71,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
@@ -6660,6 +6661,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
goto end_reset;
}
+ /* Cannot be called after locking reset domain */
+ amdgpu_ras_pre_reset(adev, &device_list);
+
/* We need to lock reset domain only once both for XGMI and single device */
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
@@ -6691,6 +6695,7 @@ skip_sched_resume:
reset_unlock:
amdgpu_device_recovery_put_reset_lock(adev, &device_list);
end_reset:
+ amdgpu_ras_post_reset(adev, &device_list);
if (hive) {
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);