mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
kernfs: Introduce separate rwsem to protect inode attributes.
Right now a global per-fs rwsem (kernfs_rwsem) synchronizes multiple kernfs operations. On a large system with few hundred CPUs and few hundred applications simultaneoulsy trying to access sysfs, this results in multiple sys_open(s) contending on kernfs_rwsem via kernfs_iop_permission and kernfs_dop_revalidate. For example on a system with 384 cores, if I run 200 instances of an application which is mostly executing the following loop: for (int loop = 0; loop <100 ; loop++) { for (int port_num = 1; port_num < 2; port_num++) { for (int gid_index = 0; gid_index < 254; gid_index++ ) { char ret_buf[64], ret_buf_lo[64]; char gid_file_path[1024]; int ret_len; int ret_fd; ssize_t ret_rd; ub4 i, saved_errno; memset(ret_buf, 0, sizeof(ret_buf)); memset(gid_file_path, 0, sizeof(gid_file_path)); ret_len = snprintf(gid_file_path, sizeof(gid_file_path), "/sys/class/infiniband/%s/ports/%d/gids/%d", dev_name, port_num, gid_index); ret_fd = open(gid_file_path, O_RDONLY | O_CLOEXEC); if (ret_fd < 0) { printf("Failed to open %s\n", gid_file_path); continue; } /* Read the GID */ ret_rd = read(ret_fd, ret_buf, 40); if (ret_rd == -1) { printf("Failed to read from file %s, errno: %u\n", gid_file_path, saved_errno); continue; } close(ret_fd); } } I see contention around kernfs_rwsem as follows: path_openat | |----link_path_walk.part.0.constprop.0 | | | |--49.92%--inode_permission | | | | | --48.69%--kernfs_iop_permission | | | | | |--18.16%--down_read | | | | | |--15.38%--up_read | | | | | --14.58%--_raw_spin_lock | | | | | ----- | | | |--29.08%--walk_component | | | | | --29.02%--lookup_fast | | | | | |--24.26%--kernfs_dop_revalidate | | | | | | | |--14.97%--down_read | | | | | | | --9.01%--up_read | | | | | --4.74%--__d_lookup | | | | | --4.64%--_raw_spin_lock | | | | | ---- Having a separate per-fs rwsem to protect kernfs inode attributes, will avoid the above mentioned contention and result in better performance as can bee seen below: path_openat | |----link_path_walk.part.0.constprop.0 | | | | | |--27.06%--inode_permission | | | | | --25.84%--kernfs_iop_permission | | | | | |--9.29%--up_read | | | | | |--8.19%--down_read | | | | | --7.89%--_raw_spin_lock | | | | | ---- | | | |--22.42%--walk_component | | | | | --22.36%--lookup_fast | | | | | |--16.07%--__d_lookup | | | | | | | --16.01%--_raw_spin_lock | | | | | | | ---- | | | | | --6.28%--kernfs_dop_revalidate | | | | | |--3.76%--down_read | | | | | --2.26%--up_read As can be seen from the above data the overhead due to both kerfs_iop_permission and kernfs_dop_revalidate have gone down and this also reduces overall run time of the earlier mentioned loop. Signed-off-by: Imran Khan <imran.f.khan@oracle.com> Link: https://lore.kernel.org/r/20230309110932.2889010-2-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
02fe26f253
commit
9caf696142
@ -770,12 +770,15 @@ int kernfs_add_one(struct kernfs_node *kn)
|
||||
goto out_unlock;
|
||||
|
||||
/* Update timestamps on the parent */
|
||||
down_write(&root->kernfs_iattr_rwsem);
|
||||
|
||||
ps_iattr = parent->iattr;
|
||||
if (ps_iattr) {
|
||||
ktime_get_real_ts64(&ps_iattr->ia_ctime);
|
||||
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
|
||||
}
|
||||
|
||||
up_write(&root->kernfs_iattr_rwsem);
|
||||
up_write(&root->kernfs_rwsem);
|
||||
|
||||
/*
|
||||
@ -940,6 +943,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
|
||||
|
||||
idr_init(&root->ino_idr);
|
||||
init_rwsem(&root->kernfs_rwsem);
|
||||
init_rwsem(&root->kernfs_iattr_rwsem);
|
||||
INIT_LIST_HEAD(&root->supers);
|
||||
|
||||
/*
|
||||
@ -1462,11 +1466,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
|
||||
pos->parent ? pos->parent->iattr : NULL;
|
||||
|
||||
/* update timestamps on the parent */
|
||||
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
|
||||
|
||||
if (ps_iattr) {
|
||||
ktime_get_real_ts64(&ps_iattr->ia_ctime);
|
||||
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
|
||||
}
|
||||
|
||||
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
|
||||
kernfs_put(pos);
|
||||
}
|
||||
|
||||
|
@ -101,9 +101,9 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
|
||||
int ret;
|
||||
struct kernfs_root *root = kernfs_root(kn);
|
||||
|
||||
down_write(&root->kernfs_rwsem);
|
||||
down_write(&root->kernfs_iattr_rwsem);
|
||||
ret = __kernfs_setattr(kn, iattr);
|
||||
up_write(&root->kernfs_rwsem);
|
||||
up_write(&root->kernfs_iattr_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -119,7 +119,7 @@ int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
return -EINVAL;
|
||||
|
||||
root = kernfs_root(kn);
|
||||
down_write(&root->kernfs_rwsem);
|
||||
down_write(&root->kernfs_iattr_rwsem);
|
||||
error = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
|
||||
if (error)
|
||||
goto out;
|
||||
@ -132,7 +132,7 @@ int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
setattr_copy(&nop_mnt_idmap, inode, iattr);
|
||||
|
||||
out:
|
||||
up_write(&root->kernfs_rwsem);
|
||||
up_write(&root->kernfs_iattr_rwsem);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -189,10 +189,10 @@ int kernfs_iop_getattr(struct mnt_idmap *idmap,
|
||||
struct kernfs_node *kn = inode->i_private;
|
||||
struct kernfs_root *root = kernfs_root(kn);
|
||||
|
||||
down_read(&root->kernfs_rwsem);
|
||||
down_read(&root->kernfs_iattr_rwsem);
|
||||
kernfs_refresh_inode(kn, inode);
|
||||
generic_fillattr(&nop_mnt_idmap, inode, stat);
|
||||
up_read(&root->kernfs_rwsem);
|
||||
up_read(&root->kernfs_iattr_rwsem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -285,10 +285,10 @@ int kernfs_iop_permission(struct mnt_idmap *idmap,
|
||||
kn = inode->i_private;
|
||||
root = kernfs_root(kn);
|
||||
|
||||
down_read(&root->kernfs_rwsem);
|
||||
down_read(&root->kernfs_iattr_rwsem);
|
||||
kernfs_refresh_inode(kn, inode);
|
||||
ret = generic_permission(&nop_mnt_idmap, inode, mask);
|
||||
up_read(&root->kernfs_rwsem);
|
||||
up_read(&root->kernfs_iattr_rwsem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ struct kernfs_root {
|
||||
|
||||
wait_queue_head_t deactivate_waitq;
|
||||
struct rw_semaphore kernfs_rwsem;
|
||||
struct rw_semaphore kernfs_iattr_rwsem;
|
||||
};
|
||||
|
||||
/* +1 to avoid triggering overflow warning when negating it */
|
||||
|
Loading…
Reference in New Issue
Block a user