diff options
| author | Dave Airlie <airlied@redhat.com> | 2025-12-03 09:43:09 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2025-12-03 09:43:49 +1000 |
| commit | 0692602defb0c273f80dec9c564ca50726404aca (patch) | |
| tree | 2fea4ab096be220f377eab9934c39790cb453c83 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
| parent | b3239df349c2c2c94686674489c9629c89ca49a1 (diff) | |
| parent | 3925683515e93844be204381d2d5a1df5de34f31 (diff) | |
Merge tag 'amd-drm-next-6.19-2025-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.19-2025-12-02:
amdgpu:
- Unified MES fix
- SMU 11 unbalanced irq fix
- Fix for driver reloading on APUs
- pp_table sysfs fix
- Fix memory leak in fence handling
- HDMI fix
- DC cursor fixes
- eDP panel parsing fix
- Brightness fix
- DC analog fixes
- EDID retry fixes
- UserQ fixes
- RAS fixes
- IP discovery fix
- Add missing locking in amdgpu_ttm_access_memory_sdma()
- Smart Power OLED fix
- PRT and page fault fixes for GC 6-8
- VMID reservation fix
- ACP platform device fix
- Add missing vm fault handling for GC 11-12
- VPE fix
amdkfd:
- Partitioning fix
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20251202220101.2039347-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9e2e098af86c..2a6cf7963dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -150,6 +150,8 @@ static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev); #ifdef CONFIG_X86_MCE_AMD static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); +static void +amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev); struct mce_notifier_adev_list { struct amdgpu_device *devs[MAX_GPU_INSTANCE]; int num_gpu; @@ -3954,7 +3956,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) mutex_unlock(&con->recovery_lock); amdgpu_ras_critical_region_init(adev); - +#ifdef CONFIG_X86_MCE_AMD + amdgpu_unregister_bad_pages_mca_notifier(adev); +#endif return 0; } /* recovery end */ @@ -4988,6 +4992,28 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev) notifier_registered = true; } } +static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev) +{ + int i, j; + + if (!notifier_registered && !mce_adev_list.num_gpu) + return; + for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) { + if (mce_adev_list.devs[i] == adev) + mce_adev_list.devs[i] = NULL; + if (!mce_adev_list.devs[i]) + ++j; + } + + if (j == mce_adev_list.num_gpu) { + mce_adev_list.num_gpu = 0; + /* Unregister x86 notifier with MCE subsystem. */ + if (notifier_registered) { + mce_unregister_decode_chain(&amdgpu_bad_page_nb); + notifier_registered = false; + } + } +} #endif struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev) |