summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h5
3 files changed, 38 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 95f7ae36e4f1..dcf6fce1c5a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -71,6 +71,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
@@ -6660,6 +6661,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
goto end_reset;
}
+ /* Cannot be called after locking reset domain */
+ amdgpu_ras_pre_reset(adev, &device_list);
+
/* We need to lock reset domain only once both for XGMI and single device */
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
@@ -6691,6 +6695,7 @@ skip_sched_resume:
reset_unlock:
amdgpu_device_recovery_put_reset_lock(adev, &device_list);
end_reset:
+ amdgpu_ras_post_reset(adev, &device_list);
if (hive) {
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 37999b367957..62d2f988d88f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2921,8 +2921,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
- amdgpu_ras_query_err_status(remote_adev);
- amdgpu_ras_log_on_err_counter(remote_adev, type);
+ if (amdgpu_uniras_enabled(remote_adev)) {
+ amdgpu_ras_mgr_update_ras_ecc(remote_adev);
+ } else {
+ amdgpu_ras_query_err_status(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev, type);
+ }
}
}
@@ -5673,3 +5677,25 @@ bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr
return ret;
}
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_pre_reset(tmp_adev);
+ }
+}
+
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (amdgpu_uniras_enabled(tmp_adev))
+ amdgpu_ras_mgr_post_reset(tmp_adev);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 674bcd3c814c..ff44190d7d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -1039,4 +1039,9 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...);
bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+ struct list_head *device_list);
#endif