From a34d1166b47c8497cffda4da7c14182cb3420362 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 13:51:53 +0800 Subject: [PATCH] drm/amd/powerplay: expose current hotspot and memory temperatures V2 Two new hwmon interfaces(temp2_input and temp3_input) are added. They are supported on SOC15 dGPUs only. - V2: correct thermal sensor output Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 45 +++++++++++++++---- .../gpu/drm/amd/include/kgd_pp_interface.h | 3 ++ .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 +++++ .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 19 ++++++++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 20 ++++++++- 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7093b4efc3a7..00ca8ec9845f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1382,6 +1382,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; + int channel = to_sensor_dev_attr(attr)->index; int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ @@ -1389,11 +1390,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -2041,7 +2063,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only * * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only @@ -2098,13 +2121,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); @@ -2134,8 +2159,10 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, &sensor_dev_attr_temp2_crit.dev_attr.attr, &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, &sensor_dev_attr_temp3_crit.dev_attr.attr, &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp1_emergency.dev_attr.attr, @@ -2272,7 +2299,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || - attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2b579ba9b685..a8bf8e90ceeb 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -111,6 +111,9 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_GPU_LOAD, AMDGPU_PP_SENSOR_GFX_MCLK, AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + AMDGPU_PP_SENSOR_MEM_TEMP, AMDGPU_PP_SENSOR_VCE_POWER, AMDGPU_PP_SENSOR_UVD_POWER, AMDGPU_PP_SENSOR_GPU_POWER, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 1422bc4e45d1..d5d0db456021 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index aeeeaa79056c..8d2865b72c7b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); + SmuMetrics_t metrics_table; int ret = 0; switch (idx) { @@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHotspot * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 3a9629c907bb..91e26f8b3758 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2138,10 +2138,28 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, if (!ret) *size = 4; break; - case AMDGPU_PP_SENSOR_GPU_TEMP: + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureEdge * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4;