diff options
| author | YiPeng Chai <YiPeng.Chai@amd.com> | 2025-07-21 15:14:03 +0800 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2025-10-20 18:18:35 -0400 |
| commit | 408bd841ad24fd9891a1cd9ca11cf4f48d9667dc (patch) | |
| tree | 9fb77c5f5f04b1f54fbdb3143c121a78f54c4b66 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
| parent | 3d72d2e5f4c47d88f292c915ce31ef14092df3f2 (diff) | |
drm/amdgpu: Improve ras fatal error handling function
In multi-gpu case, a fatal error will generate several
fatal error interrupts. After improving this function,
the ras module can reuse this function to only
handle the first interrupt.
V3:
Initialize event_id using RAS_EVENT_INVALID_ID.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9aa4b93ac6af..9e632adc7746 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4650,20 +4650,18 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type return id; } -void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) +int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); enum ras_event_type type = RAS_EVENT_TYPE_FATAL; - u64 event_id; + u64 event_id = RAS_EVENT_INVALID_ID; - if (amdgpu_ras_mark_ras_event(adev, type)) { - dev_err(adev->dev, - "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n"); - return; - } + if (amdgpu_uniras_enabled(adev)) + return 0; - event_id = amdgpu_ras_acquire_event_id(adev, type); + if (!amdgpu_ras_mark_ras_event(adev, type)) + event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); @@ -4672,6 +4670,8 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } + + return -EBUSY; } bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) |