forked from Minki/linux
drm/amdgpu: add badpages sysfs interafce
add badpages node. it will output badpages list in format gpu pfn : gpu page size : flags example 0x00000000 : 0x00001000 : R 0x00000001 : 0x00001000 : R 0x00000002 : 0x00001000 : R 0x00000003 : 0x00001000 : R 0x00000004 : 0x00001000 : R 0x00000005 : 0x00001000 : R 0x00000006 : 0x00001000 : R 0x00000007 : 0x00001000 : P 0x00000008 : 0x00001000 : P 0x00000009 : 0x00001000 : P flags can be one of below characters R: reserved. P: pending for reserve. F: failed to reserve for some reasons. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
3058770ae3
commit
466b179346
@ -90,6 +90,12 @@ struct ras_manager {
|
|||||||
struct ras_err_data err_data;
|
struct ras_err_data err_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ras_badpage {
|
||||||
|
unsigned int bp;
|
||||||
|
unsigned int size;
|
||||||
|
unsigned int flags;
|
||||||
|
};
|
||||||
|
|
||||||
const char *ras_error_string[] = {
|
const char *ras_error_string[] = {
|
||||||
"none",
|
"none",
|
||||||
"parity",
|
"parity",
|
||||||
@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
|||||||
|
|
||||||
/* sysfs begin */
|
/* sysfs begin */
|
||||||
|
|
||||||
|
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||||
|
struct ras_badpage **bps, unsigned int *count);
|
||||||
|
|
||||||
|
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
|
||||||
|
{
|
||||||
|
switch (flags) {
|
||||||
|
case 0:
|
||||||
|
return "R";
|
||||||
|
case 1:
|
||||||
|
return "P";
|
||||||
|
case 2:
|
||||||
|
default:
|
||||||
|
return "F";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DOC: ras sysfs gpu_vram_bad_pages interface
|
||||||
|
*
|
||||||
|
* It allows user to read the bad pages of vram on the gpu through
|
||||||
|
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
|
||||||
|
*
|
||||||
|
* It outputs multiple lines, and each line stands for one gpu page.
|
||||||
|
*
|
||||||
|
* The format of one line is below,
|
||||||
|
* gpu pfn : gpu page size : flags
|
||||||
|
*
|
||||||
|
* gpu pfn and gpu page size are printed in hex format.
|
||||||
|
* flags can be one of below character,
|
||||||
|
* R: reserved, this gpu page is reserved and not able to use.
|
||||||
|
* P: pending for reserve, this gpu page is marked as bad, will be reserved
|
||||||
|
* in next window of page_reserve.
|
||||||
|
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
|
||||||
|
*
|
||||||
|
* examples:
|
||||||
|
* 0x00000001 : 0x00001000 : R
|
||||||
|
* 0x00000002 : 0x00001000 : P
|
||||||
|
*/
|
||||||
|
|
||||||
|
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
|
||||||
|
struct kobject *kobj, struct bin_attribute *attr,
|
||||||
|
char *buf, loff_t ppos, size_t count)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *con =
|
||||||
|
container_of(attr, struct amdgpu_ras, badpages_attr);
|
||||||
|
struct amdgpu_device *adev = con->adev;
|
||||||
|
const unsigned int element_size =
|
||||||
|
sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
|
||||||
|
unsigned int start = (ppos + element_size - 1) / element_size;
|
||||||
|
unsigned int end = (ppos + count - 1) / element_size;
|
||||||
|
ssize_t s = 0;
|
||||||
|
struct ras_badpage *bps = NULL;
|
||||||
|
unsigned int bps_count = 0;
|
||||||
|
|
||||||
|
memset(buf, 0, count);
|
||||||
|
|
||||||
|
if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (; start < end && start < bps_count; start++)
|
||||||
|
s += scnprintf(&buf[s], element_size + 1,
|
||||||
|
"0x%08x : 0x%08x : %1s\n",
|
||||||
|
bps[start].bp,
|
||||||
|
bps[start].size,
|
||||||
|
amdgpu_ras_badpage_flags_str(bps[start].flags));
|
||||||
|
|
||||||
|
kfree(bps);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
|||||||
&con->features_attr.attr,
|
&con->features_attr.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
struct bin_attribute *bin_attrs[] = {
|
||||||
|
&con->badpages_attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
struct attribute_group group = {
|
struct attribute_group group = {
|
||||||
.name = "ras",
|
.name = "ras",
|
||||||
.attrs = attrs,
|
.attrs = attrs,
|
||||||
|
.bin_attrs = bin_attrs,
|
||||||
};
|
};
|
||||||
|
|
||||||
con->features_attr = (struct device_attribute) {
|
con->features_attr = (struct device_attribute) {
|
||||||
@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
|||||||
},
|
},
|
||||||
.show = amdgpu_ras_sysfs_features_read,
|
.show = amdgpu_ras_sysfs_features_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
con->badpages_attr = (struct bin_attribute) {
|
||||||
|
.attr = {
|
||||||
|
.name = "gpu_vram_bad_pages",
|
||||||
|
.mode = S_IRUGO,
|
||||||
|
},
|
||||||
|
.size = 0,
|
||||||
|
.private = NULL,
|
||||||
|
.read = amdgpu_ras_sysfs_badpages_read,
|
||||||
|
};
|
||||||
|
|
||||||
sysfs_attr_init(attrs[0]);
|
sysfs_attr_init(attrs[0]);
|
||||||
|
sysfs_bin_attr_init(bin_attrs[0]);
|
||||||
|
|
||||||
return sysfs_create_group(&adev->dev->kobj, &group);
|
return sysfs_create_group(&adev->dev->kobj, &group);
|
||||||
}
|
}
|
||||||
@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
|
|||||||
&con->features_attr.attr,
|
&con->features_attr.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
struct bin_attribute *bin_attrs[] = {
|
||||||
|
&con->badpages_attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
struct attribute_group group = {
|
struct attribute_group group = {
|
||||||
.name = "ras",
|
.name = "ras",
|
||||||
.attrs = attrs,
|
.attrs = attrs,
|
||||||
|
.bin_attrs = bin_attrs,
|
||||||
};
|
};
|
||||||
|
|
||||||
sysfs_remove_group(&adev->dev->kobj, &group);
|
sysfs_remove_group(&adev->dev->kobj, &group);
|
||||||
@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
|
|||||||
/* ih end */
|
/* ih end */
|
||||||
|
|
||||||
/* recovery begin */
|
/* recovery begin */
|
||||||
|
|
||||||
|
/* return 0 on success.
|
||||||
|
* caller need free bps.
|
||||||
|
*/
|
||||||
|
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||||
|
struct ras_badpage **bps, unsigned int *count)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||||
|
struct ras_err_handler_data *data;
|
||||||
|
int i = 0;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!con || !con->eh_data || !bps || !count)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&con->recovery_lock);
|
||||||
|
data = con->eh_data;
|
||||||
|
if (!data || data->count == 0) {
|
||||||
|
*bps = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
|
||||||
|
if (!*bps) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < data->count; i++) {
|
||||||
|
(*bps)[i] = (struct ras_badpage){
|
||||||
|
.bp = data->bps[i].bp,
|
||||||
|
.size = AMDGPU_GPU_PAGE_SIZE,
|
||||||
|
.flags = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (data->last_reserved <= i)
|
||||||
|
(*bps)[i].flags = 1;
|
||||||
|
else if (data->bps[i].bo == NULL)
|
||||||
|
(*bps)[i].flags = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
*count = data->count;
|
||||||
|
out:
|
||||||
|
mutex_unlock(&con->recovery_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void amdgpu_ras_do_recovery(struct work_struct *work)
|
static void amdgpu_ras_do_recovery(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct amdgpu_ras *ras =
|
struct amdgpu_ras *ras =
|
||||||
|
@ -93,6 +93,7 @@ struct amdgpu_ras {
|
|||||||
struct dentry *ent;
|
struct dentry *ent;
|
||||||
/* sysfs */
|
/* sysfs */
|
||||||
struct device_attribute features_attr;
|
struct device_attribute features_attr;
|
||||||
|
struct bin_attribute badpages_attr;
|
||||||
/* block array */
|
/* block array */
|
||||||
struct ras_manager *objs;
|
struct ras_manager *objs;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user