habanalabs: add debugfs node for configuring CS timeout

Command submission timeout is currently determined during driver
loading time. As some environments requires this timeout to be
modified in runtime, we introduce a new debugfs node that controls
the timeout value without the need to reload the driver.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2021-09-02 10:18:01 +03:00 committed by Oded Gabbay
parent 511c1957de
commit 4be9fb5303
2 changed files with 57 additions and 0 deletions

View File

@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6

View File

@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;
if (*ppos)
return 0;
sprintf(tmp_buf, "%d\n",
jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);
return rc;
}
static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u32 value;
ssize_t rc;
rc = kstrtouint_from_user(buf, count, 10, &value);
if (rc)
return rc;
if (value)
hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
return count;
}
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
.write = hl_state_dump_write
};
static const struct file_operations hl_timeout_locked_fops = {
.owner = THIS_MODULE,
.read = hl_timeout_locked_read,
.write = hl_timeout_locked_write
};
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_state_dump_fops);
debugfs_create_file("timeout_locked",
0644,
dev_entry->root,
dev_entry,
&hl_timeout_locked_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,