diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2025-09-15 09:57:29 +0200 |
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2025-09-15 09:57:29 +0200 |
| commit | f7d9c6a7f288328c32c45d9c8ac605b64f9261fd (patch) | |
| tree | e4a09de70689bfedb3ac474d7833b4a5ff61c7d1 /drivers/gpu/drm/amd/pm | |
| parent | c08c931060c7e44452e635e115913dd88214848c (diff) | |
| parent | 0d9f0083f7a5a31d91d501467b499bb8c4b25bdf (diff) | |
Merge drm/drm-next into drm-misc-next
Backmerging to drm-misc-next to get fixes from v6.17-rc6.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm/amd/pm')
29 files changed, 1085 insertions, 127 deletions
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 71d986dd7a6e..518d07afc7df 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -764,10 +764,6 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) ret = smu_send_rma_reason(smu); mutex_unlock(&adev->pm.mutex); - if (adev->cper.enabled) - if (amdgpu_cper_generate_bp_threshold_record(adev)) - dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); - return ret; } @@ -824,6 +820,21 @@ int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask) return ret; } +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + bool ret; + + if (!is_support_sw_smu(adev)) + return false; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_vcn_is_supported(smu); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, @@ -2038,6 +2049,66 @@ int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev, } /** + * amdgpu_dpm_get_temp_metrics - Retrieve metrics for a specific compute + * partition + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * @table: Pointer to a buffer where the metrics will be stored. If NULL, the + * function returns the size of the metrics structure. + * + * This function retrieves metrics for a specific temperature type, If the + * table parameter is NULL, the function returns the size of the metrics + * structure without populating it. + * + * Return: Size of the metrics structure on success, or a negative error code on failure. + */ +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret; + + if (!pp_funcs->get_temp_metrics || + !amdgpu_dpm_is_temp_metrics_supported(adev, type)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->get_temp_metrics(adev->powerplay.pp_handle, type, table); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +/** + * amdgpu_dpm_is_temp_metrics_supported - Return if specific temperature metrics support + * is available + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * + * This function returns metrics if specific temperature metrics type is supported or not. + * + * Return: True in case of metrics type supported else false. + */ +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + bool support_temp_metrics = false; + + if (!pp_funcs->temp_metrics_is_supported) + return support_temp_metrics; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + support_temp_metrics = + pp_funcs->temp_metrics_is_supported(adev->powerplay.pp_handle, type); + mutex_unlock(&adev->pm.mutex); + } + + return support_temp_metrics; +} + +/** * amdgpu_dpm_get_xcp_metrics - Retrieve metrics for a specific compute * partition * @adev: Pointer to the device. diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c index 42efe838fa85..2d2d2d5e6763 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c @@ -66,6 +66,13 @@ u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) (amdgpu_crtc->v_border * 2)); vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; + + /* we have issues with mclk switching with + * refresh rates over 120 hz on the non-DC code. + */ + if (drm_mode_vrefresh(&amdgpu_crtc->hw_mode) > 120) + vblank_time_us = 0; + break; } } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4b64851fdb42..96590c1da553 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -110,9 +110,10 @@ static int amdgpu_pm_dev_state_check(struct amdgpu_device *adev, bool runpm) bool runpm_check = runpm ? adev->in_runpm : false; if (amdgpu_in_reset(adev)) - return -EPERM; + return -EBUSY; + if (adev->in_suspend && !runpm_check) - return -EPERM; + return -EBUSY; return 0; } @@ -2073,6 +2074,134 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd return 0; } +/** + * DOC: board + * + * Certain SOCs can support various board attributes reporting. This is useful + * for user application to monitor various board reated attributes. + * + * The amdgpu driver provides a sysfs API for reporting board attributes. Presently, + * only two types of attributes are reported, baseboard temperature and + * gpu board temperature. Both of them are reported as binary files. + * + * * .. code-block:: console + * + * hexdump /sys/bus/pci/devices/.../board/baseboard_temp + * + * hexdump /sys/bus/pci/devices/.../board/gpuboard_temp + * + */ + +/** + * DOC: baseboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current baseboard + * temperature metrics data. The file baseboard_temp is used for this. + * Reading the file will dump all the current baseboard temperature metrics data. + */ +static ssize_t amdgpu_get_baseboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +/** + * DOC: gpuboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current gpuboard + * temperature metrics data. The file gpuboard_temp is used for this. + * Reading the file will dump all the current gpuboard temperature metrics data. + */ +static ssize_t amdgpu_get_gpuboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL); +static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL); + +static struct attribute *board_attrs[] = { + &dev_attr_baseboard_temp.attr, + &dev_attr_gpuboard_temp.attr, + NULL +}; + +static umode_t amdgpu_board_attr_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_baseboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_BASEBOARD)) + return 0; + } + + if (attr == &dev_attr_gpuboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) + return 0; + } + + return attr->mode; +} + +const struct attribute_group amdgpu_board_attr_group = { + .name = "board", + .attrs = board_attrs, + .is_visible = amdgpu_board_attr_visible, +}; + /* pm policy attributes */ struct amdgpu_pm_policy_attr { struct device_attribute dev_attr; @@ -3458,14 +3587,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */ - if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) && - (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) - return 0; + if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) { + if (adev->family == AMDGPU_FAMILY_SI || + ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) && + (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) || + (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3))) + return 0; + } /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || @@ -4461,6 +4592,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) goto err_out0; } + if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { + ret = devm_device_add_group(adev->dev, + &amdgpu_board_attr_group); + if (ret) + goto err_out0; + } + adev->pm.sysfs_initialized = true; return 0; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 768317ee1486..9748744133d9 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -526,6 +526,8 @@ int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); ssize_t amdgpu_dpm_get_xcp_metrics(struct amdgpu_device *adev, int xcp_id, void *table); +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table); /** * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The @@ -613,5 +615,8 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask); +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index ea3ace882a10..52dbf6d0469d 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -771,8 +771,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev, int i; struct amdgpu_ps *ps; u32 ui_class; - bool single_display = (adev->pm.dpm.new_active_crtc_count < 2) ? - true : false; + bool single_display = adev->pm.dpm.new_active_crtc_count < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && adev->powerplay.pp_funcs->vblank_too_short) { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 52e732be59e3..6595a611ce6e 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3085,7 +3085,13 @@ static bool si_dpm_vblank_too_short(void *handle) /* we never hit the non-gddr5 limit so disable it */ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; - if (vblank_time < switch_limit) + /* Consider zero vblank time too short and disable MCLK switching. + * Note that the vblank time is set to maximum when no displays are attached, + * so we'll still enable MCLK switching in that case. + */ + if (vblank_time == 0) + return true; + else if (vblank_time < switch_limit) return true; else return false; @@ -3443,12 +3449,14 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, { struct si_ps *ps = si_get_ps(rps); struct amdgpu_clock_and_voltage_limits *max_limits; + struct amdgpu_connector *conn; bool disable_mclk_switching = false; bool disable_sclk_switching = false; u32 mclk, sclk; u16 vddc, vddci, min_vce_voltage = 0; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; u32 max_sclk = 0, max_mclk = 0; + u32 high_pixelclock_count = 0; int i; if (adev->asic_type == CHIP_HAINAN) { @@ -3476,6 +3484,35 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, } } + /* We define "high pixelclock" for SI as higher than necessary for 4K 30Hz. + * For example, 4K 60Hz and 1080p 144Hz fall into this category. + * Find number of such displays connected. + */ + for (i = 0; i < adev->mode_info.num_crtc; i++) { + if (!(adev->pm.dpm.new_active_crtcs & (1 << i)) || + !adev->mode_info.crtcs[i]->enabled) + continue; + + conn = to_amdgpu_connector(adev->mode_info.crtcs[i]->connector); + + if (conn->pixelclock_for_modeset > 297000) + high_pixelclock_count++; + } + + /* These are some ad-hoc fixes to some issues observed with SI GPUs. + * They are necessary because we don't have something like dce_calcs + * for these GPUs to calculate bandwidth requirements. + */ + if (high_pixelclock_count) { + /* On Oland, we observe some flickering when two 4K 60Hz + * displays are connected, possibly because voltage is too low. + * Raise the voltage by requiring a higher SCLK. + * (Voltage cannot be adjusted independently without also SCLK.) + */ + if (high_pixelclock_count > 1 && adev->asic_type == CHIP_OLAND) + disable_sclk_switching = true; + } + if (rps->vce_active) { rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk; rps->ecclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].ecclk; @@ -5637,14 +5674,10 @@ static int si_populate_smc_t(struct amdgpu_device *adev, static int si_disable_ulv(struct amdgpu_device *adev) { - struct si_power_info *si_pi = si_get_pi(adev); - struct si_ulv_param *ulv = &si_pi->ulv; - - if (ulv->supported) - return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? - 0 : -EINVAL; + PPSMC_Result r; - return 0; + r = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV); + return (r == PPSMC_Result_OK) ? 0 : -EINVAL; } static bool si_is_state_ulv_compatible(struct amdgpu_device *adev, @@ -5817,9 +5850,9 @@ static int si_upload_smc_data(struct amdgpu_device *adev) { struct amdgpu_crtc *amdgpu_crtc = NULL; int i; - - if (adev->pm.dpm.new_active_crtc_count == 0) - return 0; + u32 crtc_index = 0; + u32 mclk_change_block_cp_min = 0; + u32 mclk_change_block_cp_max = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->pm.dpm.new_active_crtcs & (1 << i)) { @@ -5828,26 +5861,31 @@ static int si_upload_smc_data(struct amdgpu_device *adev) } } - if (amdgpu_crtc == NULL) - return 0; + /* When a display is plugged in, program these so that the SMC + * performs MCLK switching when it doesn't cause flickering. + * When no display is plugged in, there is no need to restrict + * MCLK switching, so program them to zero. + */ + if (adev->pm.dpm.new_active_crtc_count && amdgpu_crtc) { + crtc_index = amdgpu_crtc->crtc_id; - if (amdgpu_crtc->line_time <= 0) - return 0; + if (amdgpu_crtc->line_time) { + mclk_change_block_cp_min = 200 / amdgpu_crtc->line_time; + mclk_change_block_cp_max = 100 / amdgpu_crtc->line_time; + } + } - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_crtc_index, - amdgpu_crtc->crtc_id) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_crtc_index, + crtc_index); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, - amdgpu_crtc->wm_high / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, + mclk_change_block_cp_min); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, - amdgpu_crtc->wm_low / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, + mclk_change_block_cp_max); return 0; } @@ -7954,6 +7992,7 @@ static void si_dpm_print_power_state(void *handle, amdgpu_dpm_dbg_print_class_info(adev, rps->class, rps->class2); amdgpu_dpm_dbg_print_cap_info(adev, rps->caps); drm_dbg(adev_to_drm(adev), "\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + drm_dbg(adev_to_drm(adev), "\tvce evclk: %d ecclk: %d\n", rps->evclk, rps->ecclk); for (i = 0; i < ps->performance_level_count; i++) { pl = &ps->performance_levels[i]; drm_dbg(adev_to_drm(adev), "\t\tpower level %d sclk: %u mclk: %u vddc: %u vddci: %u pcie gen: %u\n", diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index 4e65ab9e931c..281a5e377aee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -172,20 +172,42 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev, { u32 tmp; int i; + int usec_timeout; + + /* SMC seems to process some messages exceptionally slowly. */ + switch (msg) { + case PPSMC_MSG_NoForcedLevel: + case PPSMC_MSG_SetEnabledLevels: + case PPSMC_MSG_SetForcedLevels: + case PPSMC_MSG_DisableULV: + case PPSMC_MSG_SwitchToSwState: + usec_timeout = 1000000; /* 1 sec */ + break; + default: + usec_timeout = 200000; /* 200 ms */ + break; + } if (!amdgpu_si_is_smc_running(adev)) return PPSMC_Result_Failed; WREG32(mmSMC_MESSAGE_0, msg); - for (i = 0; i < adev->usec_timeout; i++) { + for (i = 0; i < usec_timeout; i++) { tmp = RREG32(mmSMC_RESP_0); if (tmp != 0) break; udelay(1); } - return (PPSMC_Result)RREG32(mmSMC_RESP_0); + tmp = RREG32(mmSMC_RESP_0); + if (tmp == 0) { + drm_warn(adev_to_drm(adev), + "%s timeout on message: %x (SMC_SCRATCH0: %x)\n", + __func__, msg, RREG32(mmSMC_SCRATCH0)); + } + + return (PPSMC_Result)tmp; } PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 8d40ed0f0e83..ce166a7f8e42 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -563,8 +563,8 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3( PP_ASSERT_WITH_CODE((NULL != voltage_info), "Could not find Voltage Table in BIOS.", return false;); - ret = (NULL != atomctrl_lookup_voltage_type_v3 - (voltage_info, voltage_type, voltage_mode)) ? true : false; + ret = atomctrl_lookup_voltage_type_v3 + (voltage_info, voltage_type, voltage_mode) != NULL; return ret; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9a821563bc8e..14ccd743ca1d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1032,7 +1032,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; uint32_t min_freq, max_freq = 0; - uint32_t ret = 0; + int ret = 0; switch (type) { case PP_SCLK: diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index 5e43ad2b2956..d2dbd90bb427 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2540,9 +2540,8 @@ static int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 17d2f5bff4a7..1f50f1e74c48 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2655,9 +2655,8 @@ static int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } const struct pp_smumgr_func iceland_smu_funcs = { diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c index ff6b563ecbf5..bf6d09572cfc 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c @@ -2578,9 +2578,8 @@ static int polaris10_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c index baf51cd82a35..0d4cbe4113a0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c @@ -401,7 +401,7 @@ failed: int smu7_check_fw_load_finish(struct pp_hwmgr *hwmgr, uint32_t fw_type) { struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - uint32_t ret; + int ret; ret = phm_wait_on_indirect_register(hwmgr, mmSMC_IND_INDEX_11, smu_data->soft_regs_start + smum_get_offsetof(hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c index 6fe6e6abb5d8..2e21f9d066cb 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c @@ -3139,9 +3139,8 @@ static int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b47cb4a5f488..c5965924e7c6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -766,6 +766,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) case IP_VERSION(13, 0, 14): case IP_VERSION(13, 0, 12): smu_v13_0_6_set_ppt_funcs(smu); + smu_v13_0_6_set_temp_funcs(smu); /* Enable pp_od_clk_voltage node */ smu->od_enabled = true; break; @@ -3831,6 +3832,51 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, return ret; } +static ssize_t smu_sys_get_temp_metrics(void *handle, enum smu_temp_metric_type type, void *table) +{ + struct smu_context *smu = handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + enum smu_table_id table_id; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->smu_temp.temp_funcs || !smu->smu_temp.temp_funcs->get_temp_metrics) + return -EOPNOTSUPP; + + table_id = smu_metrics_get_temp_table_id(type); + + if (table_id == SMU_TABLE_COUNT) + return -EINVAL; + + /* If the request is to get size alone, return the cached table size */ + if (!table && tables[table_id].cache.size) + return tables[table_id].cache.size; + + if (smu_table_cache_is_valid(&tables[table_id])) { + memcpy(table, tables[table_id].cache.buffer, + tables[table_id].cache.size); + return tables[table_id].cache.size; + } + + return smu->smu_temp.temp_funcs->get_temp_metrics(smu, type, table); +} + +static bool smu_temp_metrics_is_supported(void *handle, enum smu_temp_metric_type type) +{ + struct smu_context *smu = handle; + bool ret = false; + + if (!smu->pm_enabled) + return false; + + if (smu->smu_temp.temp_funcs && smu->smu_temp.temp_funcs->temp_metrics_is_supported) + ret = smu->smu_temp.temp_funcs->temp_metrics_is_supported(smu, type); + + return ret; +} + static ssize_t smu_sys_get_xcp_metrics(void *handle, int xcp_id, void *table) { struct smu_context *smu = handle; @@ -3903,6 +3949,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_dpm_clock_table = smu_get_dpm_clock_table, .get_smu_prv_buf_details = smu_get_prv_buffer_details, .get_xcp_metrics = smu_sys_get_xcp_metrics, + .get_temp_metrics = smu_sys_get_temp_metrics, + .temp_metrics_is_supported = smu_temp_metrics_is_supported, }; int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event, @@ -4076,6 +4124,16 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +bool smu_reset_vcn_is_supported(struct smu_context *smu) +{ + bool ret = false; + + if (smu->ppt_funcs && smu->ppt_funcs->reset_vcn_is_supported) + ret = smu->ppt_funcs->reset_vcn_is_supported(smu); + + return ret; +} + int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b52e194397e2..5976eda80035 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -249,6 +249,14 @@ struct smu_user_dpm_profile { tables[table_id].domain = d; \ } while (0) +struct smu_table_cache { + void *buffer; + size_t size; + /* interval in ms*/ + uint32_t interval; + unsigned long last_cache_time; +}; + struct smu_table { uint64_t size; uint32_t align; @@ -257,6 +265,7 @@ struct smu_table { void *cpu_addr; struct amdgpu_bo *bo; uint32_t version; + struct smu_table_cache cache; }; enum smu_perf_level_designation { @@ -322,6 +331,9 @@ enum smu_table_id { SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, SMU_TABLE_WIFIBAND, + SMU_TABLE_GPUBOARD_TEMP_METRICS, + SMU_TABLE_BASEBOARD_TEMP_METRICS, + SMU_TABLE_PMFW_SYSTEM_METRICS, SMU_TABLE_COUNT, }; @@ -396,6 +408,10 @@ struct smu_dpm_context { struct smu_dpm_policy_ctxt *dpm_policies; }; +struct smu_temp_context { + const struct smu_temp_funcs *temp_funcs; +}; + struct smu_power_gate { bool uvd_gated; bool vce_gated; @@ -529,6 +545,7 @@ struct smu_context { struct smu_table_context smu_table; struct smu_dpm_context smu_dpm; struct smu_power_context smu_power; + struct smu_temp_context smu_temp; struct smu_feature smu_feature; struct amd_pp_display_configuration *display_config; struct smu_baco_context smu_baco; @@ -624,6 +641,28 @@ struct smu_context { struct i2c_adapter; /** + * struct smu_temp_funcs - Callbacks used to get temperature data. + */ +struct smu_temp_funcs { + /** + * @get_temp_metrics: Calibrate voltage/frequency curve to fit the system's + * power delivery and voltage margins. Required for adaptive + * @type Temperature metrics type(baseboard/gpuboard) + * Return: Size of &table + */ + ssize_t (*get_temp_metrics)(struct smu_context *smu, + enum smu_temp_metric_type type, void *table); + + /** + * @temp_metrics_is_support: Get if specific temperature metrics is supported + * @type Temperature metrics type(baseboard/gpuboard) + * Return: true if supported else false + */ + bool (*temp_metrics_is_supported)(struct smu_context *smu, enum smu_temp_metric_type type); + +}; + +/** * struct pptable_funcs - Callbacks used to interact with the SMU. */ struct pptable_funcs { @@ -1397,6 +1436,10 @@ struct pptable_funcs { * @reset_vcn: message SMU to soft reset vcn instance. */ int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask); + /** + * @reset_vcn_is_supported: Check if support resets vcn. + */ + bool (*reset_vcn_is_supported)(struct smu_context *smu); /** * @get_ecc_table: message SMU to get ECC INFO table. @@ -1622,6 +1665,71 @@ typedef struct { struct smu_dpm_policy *smu_get_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type); +static inline enum smu_table_id +smu_metrics_get_temp_table_id(enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + return SMU_TABLE_BASEBOARD_TEMP_METRICS; + case SMU_TEMP_METRIC_GPUBOARD: + return SMU_TABLE_GPUBOARD_TEMP_METRICS; + default: + return SMU_TABLE_COUNT; + } + + return SMU_TABLE_COUNT; +} + +static inline void smu_table_cache_update_time(struct smu_table *table, + unsigned long time) +{ + table->cache.last_cache_time = time; +} + +static inline bool smu_table_cache_is_valid(struct smu_table *table) +{ + if (!table->cache.buffer || !table->cache.last_cache_time || + !table->cache.interval || !table->cache.size || + time_after(jiffies, + table->cache.last_cache_time + + msecs_to_jiffies(table->cache.interval))) + return false; + + return true; +} + +static inline int smu_table_cache_init(struct smu_context *smu, + enum smu_table_id table_id, size_t size, + uint32_t cache_interval) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + tables[table_id].cache.buffer = kzalloc(size, GFP_KERNEL); + if (!tables[table_id].cache.buffer) + return -ENOMEM; + + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = cache_interval; + tables[table_id].cache.size = size; + + return 0; +} + +static inline void smu_table_cache_fini(struct smu_context *smu, + enum smu_table_id table_id) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + if (tables[table_id].cache.buffer) { + kfree(tables[table_id].cache.buffer); + tables[table_id].cache.buffer = NULL; + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = 0; + } +} + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, @@ -1673,6 +1781,7 @@ int smu_send_rma_reason(struct smu_context *smu); int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); bool smu_reset_sdma_is_supported(struct smu_context *smu); int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask); +bool smu_reset_vcn_is_supported(struct smu_context *smu); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h index 0a2ca544f4e3..1c407a8e96ee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -135,7 +135,63 @@ typedef enum { GFX_DVM_MARGIN_COUNT } GFX_DVM_MARGIN_e; -#define SMU_METRICS_TABLE_VERSION 0x13 +typedef enum{ + SYSTEM_TEMP_UBB_FPGA, + SYSTEM_TEMP_UBB_FRONT, + SYSTEM_TEMP_UBB_BACK, + SYSTEM_TEMP_UBB_OAM7, + SYSTEM_TEMP_UBB_IBC, + SYSTEM_TEMP_UBB_UFPGA, + SYSTEM_TEMP_UBB_OAM1, + SYSTEM_TEMP_OAM_0_1_HSC, + SYSTEM_TEMP_OAM_2_3_HSC, + SYSTEM_TEMP_OAM_4_5_HSC, + SYSTEM_TEMP_OAM_6_7_HSC, + SYSTEM_TEMP_UBB_FPGA_0V72_VR, + SYSTEM_TEMP_UBB_FPGA_3V3_VR, + SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR, + SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR, + SYSTEM_TEMP_RETIMER_0_1_0V9_VR, + SYSTEM_TEMP_RETIMER_4_5_0V9_VR, + SYSTEM_TEMP_RETIMER_2_3_0V9_VR, + SYSTEM_TEMP_RETIMER_6_7_0V9_VR, + SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR, + SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR, + SYSTEM_TEMP_IBC_HSC, + SYSTEM_TEMP_IBC, + SYSTEM_TEMP_MAX_ENTRIES = 32 +} SYSTEM_TEMP_e; + +typedef enum{ + NODE_TEMP_RETIMER, + NODE_TEMP_IBC_TEMP, + NODE_TEMP_IBC_2_TEMP, + NODE_TEMP_VDD18_VR_TEMP, + NODE_TEMP_04_HBM_B_VR_TEMP, + NODE_TEMP_04_HBM_D_VR_TEMP, + NODE_TEMP_MAX_TEMP_ENTRIES = 12 +} NODE_TEMP_e; + +typedef enum { + SVI_VDDCR_VDD0_TEMP, + SVI_VDDCR_VDD1_TEMP, + SVI_VDDCR_VDD2_TEMP, + SVI_VDDCR_VDD3_TEMP, + SVI_VDDCR_SOC_A_TEMP, + SVI_VDDCR_SOC_C_TEMP, + SVI_VDDCR_SOCIO_A_TEMP, + SVI_VDDCR_SOCIO_C_TEMP, + SVI_VDD_085_HBM_TEMP, + SVI_VDDCR_11_HBM_B_TEMP, + SVI_VDDCR_11_HBM_D_TEMP, + SVI_VDD_USR_TEMP, + SVI_VDDIO_11_E32_TEMP, + SVI_MAX_TEMP_ENTRIES, // 13 +} SVI_TEMP_e; + +#define SMU_METRICS_TABLE_VERSION 0x14 + +#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x0 typedef struct __attribute__((packed, aligned(4))) { uint64_t AccumulationCounter; @@ -231,11 +287,27 @@ typedef struct __attribute__((packed, aligned(4))) { uint64_t GfxclkBelowHostLimitThmAcc[8]; uint64_t GfxclkBelowHostLimitTotalAcc[8]; uint64_t GfxclkLowUtilizationAcc[8]; + + uint32_t AidTemperature[4]; + uint32_t XcdTemperature[8]; + uint32_t HbmTemperature[8]; } MetricsTable_t; #define SMU_VF_METRICS_TABLE_MASK (1 << 31) #define SMU_VF_METRICS_TABLE_VERSION (0x6 | SMU_VF_METRICS_TABLE_MASK) +#pragma pack(push, 4) +typedef struct { + uint64_t AccumulationCounter; // Last update timestamp + uint16_t LabelVersion; // Defaults to 0. + uint16_t NodeIdentifier; // Unique identifier to each node on system. + int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius + int16_t spare[3]; +} SystemMetricsTable_t; +#pragma pack(pop) + typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; uint32_t InstGfxclk_TargFreq; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index e1f490b6ce64..aff2776a8b6f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -116,7 +116,11 @@ #define PPSMC_MSG_DumpErrorRecord 0x57 #define PPSMC_MSG_EraseRasTable 0x58 #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVfArbitersByIndex 0x5A +#define PPSMC_MSG_GetBadPageSeverity 0x5B +#define PPSMC_MSG_GetSystemMetricsTable 0x5C +#define PPSMC_MSG_GetSystemMetricsVersion 0x5D +#define PPSMC_Message_Count 0x5E //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41f268313613..63a088ef7169 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -94,9 +94,9 @@ #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SetThrottlingPolicy 0x44 #define PPSMC_MSG_ResetSDMA 0x4D -#define PPSMC_MSG_ResetVCN 0x4E #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVCN 0x5B +#define PPSMC_Message_Count 0x5C //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index d7a9e41820fa..2256c77da636 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -278,7 +278,8 @@ __SMU_DUMMY_MAP(MALLPowerState), \ __SMU_DUMMY_MAP(ResetSDMA), \ __SMU_DUMMY_MAP(ResetVCN), \ - __SMU_DUMMY_MAP(GetStaticMetricsTable), + __SMU_DUMMY_MAP(GetStaticMetricsTable), \ + __SMU_DUMMY_MAP(GetSystemMetricsTable), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -469,6 +470,7 @@ enum smu_feature_mask { /* Message category flags */ #define SMU_MSG_VF_FLAG (1U << 0) #define SMU_MSG_RAS_PRI (1U << 1) +#define SMU_MSG_NO_PRECHECK (1U << 2) /* Firmware capability flags */ #define SMU_FW_CAP_RAS_PRI (1U << 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 9ad46f545d15..599eddb5a67d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1897,7 +1897,7 @@ static ssize_t arcturus_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index e97b0cf19197..3baf20f4c373 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -470,7 +470,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu, static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) { uint32_t min = 0, max = 0; - uint32_t ret = 0; + int ret = 0; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetMinGfxclkFrequency, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index c63d2e28954d..b067147b7c41 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1781,7 +1781,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 02a455a31c25..0bec12b348ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -83,7 +83,6 @@ const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] = SMU_13_0_12_FEA_MAP(SMU_FEATURE_PIT_BIT, FEATURE_PIT), }; -// clang-format off const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -106,7 +105,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -138,8 +137,55 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), + MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0), }; +int smu_v13_0_12_tables_init(struct smu_context *smu) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + struct smu_table_cache *cache; + int ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), 5); + + if (ret) + return ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS, + sizeof(*baseboard_temp_metrics), 50); + if (ret) + return ret; + /* Initialize base board temperature metrics */ + cache = &(tables[SMU_TABLE_BASEBOARD_TEMP_METRICS].cache); + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) cache->buffer; + smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 0); + /* Initialize GPU board temperature metrics */ + ret = smu_table_cache_init(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS, + sizeof(*gpuboard_temp_metrics), 50); + if (ret) { + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + return ret; + } + cache = &(tables[SMU_TABLE_GPUBOARD_TEMP_METRICS].cache); + gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 *)cache->buffer; + smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0); + + return 0; +} + +void smu_v13_0_12_tables_fini(struct smu_context *smu) +{ + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); +} + static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu, uint64_t *feature_mask) { @@ -187,6 +233,11 @@ int smu_v13_0_12_get_max_metrics_size(void) return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t)); } +size_t smu_v13_0_12_get_system_metrics_size(void) +{ + return sizeof(SystemMetricsTable_t); +} + static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu, StaticMetricsTable_t *static_metrics) { @@ -220,7 +271,7 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; uint32_t table_version; - int ret, i; + int ret, i, n; if (!pptable->Init) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -259,6 +310,22 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) /* use AID0 serial number by default */ pptable->PublicSerialNumber_AID = static_metrics->PublicSerialNumber_AID[0]; + + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + static_metrics->PublicSerialNumber_AID[i]); + } + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + static_metrics->PublicSerialNumber_XCD[i]); + } + ret = smu_v13_0_12_fru_get_product_info(smu, static_metrics); if (ret) return ret; @@ -359,6 +426,248 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, return 0; } +static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + struct smu_table *tables = smu_table->tables; + struct smu_table *sys_table; + int ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + if (smu_table_cache_is_valid(sys_table)) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, NULL); + if (ret) { + dev_info(smu->adev->dev, + "Failed to export system metrics table!\n"); + return ret; + } + + amdgpu_asic_invalidate_hdp(smu->adev, NULL); + smu_table_cache_update_time(sys_table, jiffies); + memcpy(sys_table->cache.buffer, table->cpu_addr, + smu_v13_0_12_get_system_metrics_size()); + + return 0; +} + +static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e type) +{ + switch (type) { + case NODE_TEMP_RETIMER: + return AMDGPU_RETIMER_X_TEMP; + case NODE_TEMP_IBC_TEMP: + return AMDGPU_OAM_X_IBC_TEMP; + case NODE_TEMP_IBC_2_TEMP: + return AMDGPU_OAM_X_IBC_2_TEMP; + case NODE_TEMP_VDD18_VR_TEMP: + return AMDGPU_OAM_X_VDD18_VR_TEMP; + case NODE_TEMP_04_HBM_B_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_B_VR_TEMP; + case NODE_TEMP_04_HBM_D_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_D_VR_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type) +{ + switch (type) { + case SVI_VDDCR_VDD0_TEMP: + return AMDGPU_VDDCR_VDD0_TEMP; + case SVI_VDDCR_VDD1_TEMP: + return AMDGPU_VDDCR_VDD1_TEMP; + case SVI_VDDCR_VDD2_TEMP: + return AMDGPU_VDDCR_VDD2_TEMP; + case SVI_VDDCR_VDD3_TEMP: + return AMDGPU_VDDCR_VDD3_TEMP; + case SVI_VDDCR_SOC_A_TEMP: + return AMDGPU_VDDCR_SOC_A_TEMP; + case SVI_VDDCR_SOC_C_TEMP: + return AMDGPU_VDDCR_SOC_C_TEMP; + case SVI_VDDCR_SOCIO_A_TEMP: + return AMDGPU_VDDCR_SOCIO_A_TEMP; + case SVI_VDDCR_SOCIO_C_TEMP: + return AMDGPU_VDDCR_SOCIO_C_TEMP; + case SVI_VDD_085_HBM_TEMP: + return AMDGPU_VDD_085_HBM_TEMP; + case SVI_VDDCR_11_HBM_B_TEMP: + return AMDGPU_VDDCR_11_HBM_B_TEMP; + case SVI_VDDCR_11_HBM_D_TEMP: + return AMDGPU_VDDCR_11_HBM_D_TEMP; + case SVI_VDD_USR_TEMP: + return AMDGPU_VDD_USR_TEMP; + case SVI_VDDIO_11_E32_TEMP: + return AMDGPU_VDDIO_11_E32_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_system_temp smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type) +{ + switch (type) { + case SYSTEM_TEMP_UBB_FPGA: + return AMDGPU_UBB_FPGA_TEMP; + case SYSTEM_TEMP_UBB_FRONT: + return AMDGPU_UBB_FRONT_TEMP; + case SYSTEM_TEMP_UBB_BACK: + return AMDGPU_UBB_BACK_TEMP; + case SYSTEM_TEMP_UBB_OAM7: + return AMDGPU_UBB_OAM7_TEMP; + case SYSTEM_TEMP_UBB_IBC: + return AMDGPU_UBB_IBC_TEMP; + case SYSTEM_TEMP_UBB_UFPGA: + return AMDGPU_UBB_UFPGA_TEMP; + case SYSTEM_TEMP_UBB_OAM1: + return AMDGPU_UBB_OAM1_TEMP; + case SYSTEM_TEMP_OAM_0_1_HSC: + return AMDGPU_OAM_0_1_HSC_TEMP; + case SYSTEM_TEMP_OAM_2_3_HSC: + return AMDGPU_OAM_2_3_HSC_TEMP; + case SYSTEM_TEMP_OAM_4_5_HSC: + return AMDGPU_OAM_4_5_HSC_TEMP; + case SYSTEM_TEMP_OAM_6_7_HSC: + return AMDGPU_OAM_6_7_HSC_TEMP; + case SYSTEM_TEMP_UBB_FPGA_0V72_VR: + return AMDGPU_UBB_FPGA_0V72_VR_TEMP; + case SYSTEM_TEMP_UBB_FPGA_3V3_VR: + return AMDGPU_UBB_FPGA_3V3_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR: + return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR: + return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_0V9_VR: + return AMDGPU_RETIMER_0_1_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_0V9_VR: + return AMDGPU_RETIMER_4_5_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_2_3_0V9_VR: + return AMDGPU_RETIMER_2_3_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_6_7_0V9_VR: + return AMDGPU_RETIMER_6_7_0V9_VR_TEMP; + case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR: + return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP; + case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR: + return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP; + case SYSTEM_TEMP_IBC_HSC: + return AMDGPU_IBC_HSC_TEMP; + case SYSTEM_TEMP_IBC: + return AMDGPU_IBC_TEMP; + default: + return -EINVAL; + } +} + +static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu, + enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + if (smu->adev->gmc.xgmi.physical_node_id == 0 && + smu->adev->gmc.xgmi.num_physical_nodes > 1 && + smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS))) + return true; + break; + case SMU_TEMP_METRIC_GPUBOARD: + return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)); + default: + break; + } + + return false; +} + +static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu, + enum smu_temp_metric_type type, void *table) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + SystemMetricsTable_t *metrics; + struct smu_table *data_table; + struct smu_table *sys_table; + int ret, sensor_type; + u32 idx, sensors; + ssize_t size; + + if (type == SMU_TEMP_METRIC_BASEBOARD) { + /* Initialize base board temperature metrics */ + data_table = + &smu->smu_table.tables[SMU_TABLE_BASEBOARD_TEMP_METRICS]; + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } else { + data_table = + &smu->smu_table.tables[SMU_TABLE_GPUBOARD_TEMP_METRICS]; + gpuboard_temp_metrics = + (struct amdgpu_gpuboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } + + ret = smu_v13_0_12_get_system_metrics_table(smu); + if (ret) + return ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; + smu_table_cache_update_time(data_table, jiffies); + + if (type == SMU_TEMP_METRIC_GPUBOARD) { + gpuboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + gpuboard_temp_metrics->label_version = metrics->LabelVersion; + gpuboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->NodeTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_node_sensor_type(sensors); + gpuboard_temp_metrics->node_temp[idx] = + ((int)metrics->NodeTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->node_temp[idx] |= (sensor_type << 24); + idx++; + } + } + + idx = 0; + + for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->VrTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_vr_sensor_type(sensors); + gpuboard_temp_metrics->vr_temp[idx] = + ((int)metrics->VrTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->vr_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } else if (type == SMU_TEMP_METRIC_BASEBOARD) { + baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + baseboard_temp_metrics->label_version = metrics->LabelVersion; + baseboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) { + if (metrics->SystemTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_system_sensor_type(sensors); + baseboard_temp_metrics->system_temp[idx] = + ((int)metrics->SystemTemperatures[sensors]) & 0xFFFFFF; + baseboard_temp_metrics->system_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } + + memcpy(table, data_table->cache.buffer, size); + + return size; +} + ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics) { const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW; @@ -572,3 +881,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void return sizeof(*gpu_metrics); } + +const struct smu_temp_funcs smu_v13_0_12_temp_funcs = { + .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported, + .get_temp_metrics = smu_v13_0_12_get_temp_metrics, +}; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 9cc294f4708b..ebee659f8a1c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -145,7 +145,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -177,7 +177,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), - MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 0), + MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), }; // clang-format on @@ -312,6 +312,8 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); if (fw_ver >= 0x5551200) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + if (fw_ver >= 0x5551800) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); if (fw_ver >= 0x5551600) { smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); @@ -350,6 +352,13 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } + + if (fw_ver >= 0x04560700) { + if (!amdgpu_sriov_vf(smu->adev)) + smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); + } else { + smu_v13_0_12_tables_fini(smu); + } } static void smu_v13_0_6_init_caps(struct smu_context *smu) @@ -402,19 +411,37 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) if ((pgm == 7 && fw_ver >= 0x7550E00) || (pgm == 0 && fw_ver >= 0x00557E00)) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); - if ((pgm == 0 && fw_ver >= 0x00557F01) || - (pgm == 7 && fw_ver >= 0x7551000)) { - smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); - smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); + + if (amdgpu_sriov_vf(adev)) { + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); + } + } else { + if ((pgm == 0 && fw_ver >= 0x00557F01) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + } + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } - if ((pgm == 0 && fw_ver >= 0x00558000) || - (pgm == 7 && fw_ver >= 0x7551000)) - smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || ((pgm == 4) && (fw_ver >= 0x4557000))) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + + if (((pgm == 0) && (fw_ver >= 0x00558200)) || + ((pgm == 4) && (fw_ver >= 0x04557100))) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); } static void smu_v13_0_x_init_caps(struct smu_context *smu) @@ -511,8 +538,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; + void *gpu_metrics_table __free(kfree) = NULL; + void *driver_pptable __free(kfree) = NULL; + void *metrics_table __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int gpu_metrcs_size = METRICS_TABLE_SIZE; + int ret; if (!(adev->flags & AMD_IS_APU)) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, @@ -528,27 +559,36 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); - if (!smu_table->metrics_table) + SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); + + metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); + if (!metrics_table) return -ENOMEM; smu_table->metrics_time = 0; smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8); - smu_table->gpu_metrics_table = + gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); - if (!smu_table->gpu_metrics_table) { - kfree(smu_table->metrics_table); + if (!gpu_metrics_table) return -ENOMEM; - } - smu_table->driver_pptable = - kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); - if (!smu_table->driver_pptable) { - kfree(smu_table->metrics_table); - kfree(smu_table->gpu_metrics_table); + driver_pptable = kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); + if (!driver_pptable) return -ENOMEM; + + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12)) { + ret = smu_v13_0_12_tables_init(smu); + if (ret) + return ret; } + smu_table->gpu_metrics_table = no_free_ptr(gpu_metrics_table); + smu_table->metrics_table = no_free_ptr(metrics_table); + smu_table->driver_pptable = no_free_ptr(driver_pptable); + return 0; } @@ -677,6 +717,13 @@ static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) return ret; } +static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) + smu_v13_0_12_tables_fini(smu); + return smu_v13_0_fini_smc_tables(smu); +} + static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num) @@ -803,7 +850,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; int version = smu_v13_0_6_get_metrics_version(smu); - int ret, i, retry = 100; + int ret, i, retry = 100, n; uint32_t table_version; uint16_t max_speed; uint8_t max_width; @@ -865,6 +912,23 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + GET_METRIC_FIELD(PublicSerialNumber_AID, + version)[i]); + } + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + GET_METRIC_FIELD(PublicSerialNumber_XCD, + version)[i]); + } + pptable->Init = true; if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -2560,9 +2624,9 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; int version = smu_v13_0_6_get_metrics_version(smu); struct amdgpu_partition_metrics_v1_0 *xcp_metrics; + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int ret, inst, i, j, k, idx; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; @@ -2587,17 +2651,14 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, return -ENOMEM; ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); - if (ret) { - kfree(metrics_v0); + if (ret) return ret; - } if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - ret = smu_v13_0_12_get_xcp_metrics(smu, xcp, table, metrics_v0); - goto out; - } + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_xcp_metrics(smu, xcp, table, + metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2668,8 +2729,6 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, idx++; } } -out: - kfree(metrics_v0); return sizeof(*xcp_metrics); } @@ -2680,31 +2739,26 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct gpu_metrics_v1_8 *gpu_metrics = (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int version = smu_v13_0_6_get_metrics_version(smu); + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; u16 link_width_level; - ssize_t num_bytes; u8 num_jpeg_rings; u32 inst_mask; bool per_inst; metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); - if (ret) { - kfree(metrics_v0); + if (ret) return ret; - } - if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - num_bytes = smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); - kfree(metrics_v0); - return num_bytes; - } + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2890,7 +2944,6 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); *table = (void *)gpu_metrics; - kfree(metrics_v0); return sizeof(*gpu_metrics); } @@ -3076,7 +3129,7 @@ static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int var = (adev->pdev->device & 0xF); - if (var == 0x1) + if (var == 0x0 || var == 0x1 || var == 0x3) return true; return false; @@ -3152,6 +3205,11 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu) +{ + return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET)); +} + static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { int ret = 0; @@ -3797,7 +3855,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .init_microcode = smu_v13_0_6_init_microcode, .fini_microcode = smu_v13_0_fini_microcode, .init_smc_tables = smu_v13_0_6_init_smc_tables, - .fini_smc_tables = smu_v13_0_fini_smc_tables, + .fini_smc_tables = smu_v13_0_6_fini_smc_tables, .init_power = smu_v13_0_init_power, .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_6_check_fw_status, @@ -3840,6 +3898,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .reset_sdma = smu_v13_0_6_reset_sdma, .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, .dpm_reset_vcn = smu_v13_0_6_reset_vcn, + .reset_vcn_is_supported = smu_v13_0_6_reset_vcn_is_supported, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) @@ -3857,3 +3916,9 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); } + +void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) +{ + smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index 67b30674fd31..aae9a546a67e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -64,14 +64,17 @@ enum smu_v13_0_6_caps { SMU_CAP(RMA_MSG), SMU_CAP(ACA_SYND), SMU_CAP(SDMA_RESET), + SMU_CAP(VCN_RESET), SMU_CAP(STATIC_METRICS), SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(BOARD_VOLTAGE), SMU_CAP(PLDM_VERSION), + SMU_CAP(TEMP_METRICS), SMU_CAP(ALL), }; extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); +extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu); bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu); int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, @@ -79,6 +82,7 @@ int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); int smu_v13_0_12_get_max_metrics_size(void); +size_t smu_v13_0_12_get_system_metrics_size(void); int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu); int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value); @@ -86,6 +90,9 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics); +int smu_v13_0_12_tables_init(struct smu_context *smu); +void smu_v13_0_12_tables_fini(struct smu_context *smu); extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; +extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 3aea32baea3d..f32474af90b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1697,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; CustomSkuTable_t *skutable = &pptable->CustomSkuTable; - uint32_t power_limit; + uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0; uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v14_0_get_current_power_limit(smu, &power_limit)) @@ -1712,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) - *max_power_limit = msg_limit; + if (powerplay_table) { + if (smu->od_enabled && + smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = 0; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } + } + + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = msg_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - if (min_power_limit) - *min_power_limit = 0; + if (min_power_limit) { + *min_power_limit = power_limit * (100 + od_percent_lower); + *min_power_limit /= 100; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 59f9abd0f7b8..f532f7c69259 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -256,11 +256,12 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, { struct amdgpu_device *adev = smu->adev; uint32_t flags, resp; - bool fed_status; + bool fed_status, pri; flags = __smu_cmn_get_msg_flags(smu, msg); *poll = true; + pri = !!(flags & SMU_MSG_NO_PRECHECK); /* When there is RAS fatal error, FW won't process non-RAS priority * messages. Don't allow any messages other than RAS priority messages. */ @@ -272,15 +273,18 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, smu_get_message_name(smu, msg)); return -EACCES; } + } + if (pri || fed_status) { /* FW will ignore non-priority messages when a RAS fatal error - * is detected. Hence it is possible that a previous message - * wouldn't have got response. Allow to continue without polling - * for response status for priority messages. + * or reset condition is detected. Hence it is possible that a + * previous message wouldn't have got response. Allow to + * continue without polling for response status for priority + * messages. */ resp = RREG32(smu->resp_reg); dev_dbg(adev->dev, - "Sending RAS priority message %s response status: %x", + "Sending priority message %s response status: %x", smu_get_message_name(smu, msg), resp); if (resp == 0) *poll = false; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index a608cdbdada4..d588f74b98de 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -65,6 +65,32 @@ header->structure_size = sizeof(*tmp); \ } while (0) +#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + +#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + extern const int link_speed[]; /* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */ |