diff options
| author | Gangliang Xie <ganglxie@amd.com> | 2025-09-04 18:04:33 +0800 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2025-11-06 09:55:58 -0500 |
| commit | 9ce015e5fd025a23e357bbbe6602c1e7cb4b89e0 (patch) | |
| tree | 3ed8f8eb6ee1971c2ebeb3f180988dc6da61741f /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | |
| parent | f903b85ed0f14fc412d8a781d3fcc0c023dfcd7c (diff) | |
drm/amdgpu: adapt reset function for pmfw eeprom
adapt reset function for pmfw eeprom
Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 61 |
1 files changed, 36 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 258ff0f121a2..e9c5781e4376 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -444,40 +444,51 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + u32 erase_res = 0; u8 csum; int res; mutex_lock(&control->ras_tbl_mutex); - hdr->header = RAS_TABLE_HDR_VAL; - amdgpu_ras_set_eeprom_table_version(control); + if (!amdgpu_ras_smu_eeprom_supported(adev)) { + hdr->header = RAS_TABLE_HDR_VAL; + amdgpu_ras_set_eeprom_table_version(control); - if (hdr->version >= RAS_TABLE_VER_V2_1) { - hdr->first_rec_offset = RAS_RECORD_START_V2_1; - hdr->tbl_size = RAS_TABLE_HEADER_SIZE + - RAS_TABLE_V2_1_INFO_SIZE; - rai->rma_status = GPU_HEALTH_USABLE; - /** - * GPU health represented as a percentage. - * 0 means worst health, 100 means fully health. - */ - rai->health_percent = 100; - /* ecc_page_threshold = 0 means disable bad page retirement */ - rai->ecc_page_threshold = con->bad_page_cnt_threshold; + if (hdr->version >= RAS_TABLE_VER_V2_1) { + hdr->first_rec_offset = RAS_RECORD_START_V2_1; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE; + rai->rma_status = GPU_HEALTH_USABLE; + /** + * GPU health represented as a percentage. + * 0 means worst health, 100 means fully health. + */ + rai->health_percent = 100; + /* ecc_page_threshold = 0 means disable bad page retirement */ + rai->ecc_page_threshold = con->bad_page_cnt_threshold; + } else { + hdr->first_rec_offset = RAS_RECORD_START; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE; + } + + csum = __calc_hdr_byte_sum(control); + if (hdr->version >= RAS_TABLE_VER_V2_1) + csum += __calc_ras_info_byte_sum(control); + csum = -csum; + hdr->checksum = csum; + res = __write_table_header(control); + if (!res && hdr->version > RAS_TABLE_VER_V1) + res = __write_table_ras_info(control); } else { - hdr->first_rec_offset = RAS_RECORD_START; - hdr->tbl_size = RAS_TABLE_HEADER_SIZE; + res = amdgpu_ras_smu_erase_ras_table(adev, &erase_res); + if (res || erase_res) { + dev_warn(adev->dev, "RAS EEPROM reset failed, res:%d result:%d", + res, erase_res); + if (!res) + res = -EIO; + } } - csum = __calc_hdr_byte_sum(control); - if (hdr->version >= RAS_TABLE_VER_V2_1) - csum += __calc_ras_info_byte_sum(control); - csum = -csum; - hdr->checksum = csum; - res = __write_table_header(control); - if (!res && hdr->version > RAS_TABLE_VER_V1) - res = __write_table_ras_info(control); - control->ras_num_recs = 0; control->ras_num_bad_pages = 0; control->ras_num_mca_recs = 0; |