mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
bcachefs: switch to rhashtable for vfs inodes hash
the standard vfs inode hash table suffers from painful lock contention - this is long overdue Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
88d2ae0e6e
commit
112d21fd1a
@ -361,7 +361,7 @@ retry:
|
|||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
acl = _acl;
|
acl = _acl;
|
||||||
|
|
||||||
ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
|
ret = bch2_subvol_is_ro_trans(trans, inode->ei_inum.subvol) ?:
|
||||||
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
|
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
|
||||||
BTREE_ITER_intent);
|
BTREE_ITER_intent);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -1023,6 +1023,7 @@ struct bch_fs {
|
|||||||
/* fs.c */
|
/* fs.c */
|
||||||
struct list_head vfs_inodes_list;
|
struct list_head vfs_inodes_list;
|
||||||
struct mutex vfs_inodes_lock;
|
struct mutex vfs_inodes_lock;
|
||||||
|
struct rhashtable vfs_inodes_table;
|
||||||
|
|
||||||
/* VFS IO PATH - fs-io.c */
|
/* VFS IO PATH - fs-io.c */
|
||||||
struct bio_set writepage_bioset;
|
struct bio_set writepage_bioset;
|
||||||
|
@ -486,7 +486,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
|
|||||||
op->nr_replicas = nr_replicas;
|
op->nr_replicas = nr_replicas;
|
||||||
op->res.nr_replicas = nr_replicas;
|
op->res.nr_replicas = nr_replicas;
|
||||||
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
|
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
|
||||||
op->subvol = inode->ei_subvol;
|
op->subvol = inode->ei_inum.subvol;
|
||||||
op->pos = POS(inode->v.i_ino, sector);
|
op->pos = POS(inode->v.i_ino, sector);
|
||||||
op->end_io = bch2_writepage_io_done;
|
op->end_io = bch2_writepage_io_done;
|
||||||
op->devs_need_flush = &inode->ei_devs_need_flush;
|
op->devs_need_flush = &inode->ei_devs_need_flush;
|
||||||
|
@ -500,7 +500,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
|
|||||||
dio->op.target = dio->op.opts.foreground_target;
|
dio->op.target = dio->op.opts.foreground_target;
|
||||||
dio->op.write_point = writepoint_hashed((unsigned long) current);
|
dio->op.write_point = writepoint_hashed((unsigned long) current);
|
||||||
dio->op.nr_replicas = dio->op.opts.data_replicas;
|
dio->op.nr_replicas = dio->op.opts.data_replicas;
|
||||||
dio->op.subvol = inode->ei_subvol;
|
dio->op.subvol = inode->ei_inum.subvol;
|
||||||
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
|
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
|
||||||
dio->op.devs_need_flush = &inode->ei_devs_need_flush;
|
dio->op.devs_need_flush = &inode->ei_devs_need_flush;
|
||||||
|
|
||||||
|
@ -267,7 +267,7 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode,
|
|||||||
* XXX: we're doing two index lookups when we end up reading the
|
* XXX: we're doing two index lookups when we end up reading the
|
||||||
* folio
|
* folio
|
||||||
*/
|
*/
|
||||||
ret = range_has_data(c, inode->ei_subvol,
|
ret = range_has_data(c, inode->ei_inum.subvol,
|
||||||
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
|
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
|
||||||
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
|
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
@ -618,7 +618,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
|||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_subvolume_get_snapshot(trans,
|
ret = bch2_subvolume_get_snapshot(trans,
|
||||||
inode->ei_subvol, &snapshot);
|
inode->ei_inum.subvol, &snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto bkey_err;
|
goto bkey_err;
|
||||||
|
|
||||||
@ -823,7 +823,7 @@ static int quota_reserve_range(struct bch_inode_info *inode,
|
|||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
|
ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ static int bch2_ioc_setflags(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&inode->ei_update_lock);
|
mutex_lock(&inode->ei_update_lock);
|
||||||
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||||
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
|
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
|
||||||
ATTR_CTIME);
|
ATTR_CTIME);
|
||||||
mutex_unlock(&inode->ei_update_lock);
|
mutex_unlock(&inode->ei_update_lock);
|
||||||
@ -184,7 +184,7 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&inode->ei_update_lock);
|
mutex_lock(&inode->ei_update_lock);
|
||||||
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||||
bch2_set_projid(c, inode, fa.fsx_projid) ?:
|
bch2_set_projid(c, inode, fa.fsx_projid) ?:
|
||||||
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
|
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
|
||||||
ATTR_CTIME);
|
ATTR_CTIME);
|
||||||
|
205
fs/bcachefs/fs.c
205
fs/bcachefs/fs.c
@ -108,7 +108,7 @@ retry:
|
|||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
|
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
|
||||||
"%s: inode %u:%llu not found when updating",
|
"%s: inode %llu:%llu not found when updating",
|
||||||
bch2_err_str(ret),
|
bch2_err_str(ret),
|
||||||
inode_inum(inode).subvol,
|
inode_inum(inode).subvol,
|
||||||
inode_inum(inode).inum);
|
inode_inum(inode).inum);
|
||||||
@ -152,50 +152,95 @@ int bch2_fs_quota_transfer(struct bch_fs *c,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_iget5_test(struct inode *vinode, void *p)
|
static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
|
||||||
{
|
{
|
||||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
return a.subvol == b.subvol && a.inum == b.inum;
|
||||||
subvol_inum *inum = p;
|
|
||||||
|
|
||||||
return inode->ei_subvol == inum->subvol &&
|
|
||||||
inode->ei_inode.bi_inum == inum->inum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_iget5_set(struct inode *vinode, void *p)
|
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||||
|
const void *obj)
|
||||||
{
|
{
|
||||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
const struct bch_inode_info *inode = obj;
|
||||||
subvol_inum *inum = p;
|
const subvol_inum *v = arg->key;
|
||||||
|
|
||||||
inode->v.i_ino = inum->inum;
|
return !subvol_inum_eq(inode->ei_inum, *v);
|
||||||
inode->ei_subvol = inum->subvol;
|
|
||||||
inode->ei_inode.bi_inum = inum->inum;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned bch2_inode_hash(subvol_inum inum)
|
static const struct rhashtable_params bch2_vfs_inodes_params = {
|
||||||
|
.head_offset = offsetof(struct bch_inode_info, hash),
|
||||||
|
.key_offset = offsetof(struct bch_inode_info, ei_inum),
|
||||||
|
.key_len = sizeof(subvol_inum),
|
||||||
|
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
|
||||||
|
.automatic_shrinking = true,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __wait_on_freeing_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
|
wait_queue_head_t *wq;
|
||||||
|
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
|
||||||
|
wq = bit_waitqueue(&inode->i_state, __I_NEW);
|
||||||
|
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||||
|
spin_unlock(&inode->i_lock);
|
||||||
|
schedule();
|
||||||
|
finish_wait(wq, &wait.wq_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
|
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
|
||||||
{
|
{
|
||||||
return to_bch_ei(ilookup5_nowait(c->vfs_sb,
|
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
|
||||||
bch2_inode_hash(inum),
|
|
||||||
bch2_iget5_test,
|
|
||||||
&inum));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
|
static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
|
||||||
{
|
{
|
||||||
subvol_inum inum = inode_inum(inode);
|
struct bch_inode_info *inode;
|
||||||
struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,
|
repeat:
|
||||||
bch2_inode_hash(inum),
|
inode = __bch2_inode_hash_find(c, inum);
|
||||||
bch2_iget5_test,
|
if (inode) {
|
||||||
bch2_iget5_set,
|
spin_lock(&inode->v.i_lock);
|
||||||
&inum));
|
if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
|
||||||
BUG_ON(!old);
|
spin_unlock(&inode->v.i_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
|
||||||
|
__wait_on_freeing_inode(&inode->v);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
__iget(&inode->v);
|
||||||
|
spin_unlock(&inode->v.i_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return inode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)
|
||||||
|
{
|
||||||
|
spin_lock(&inode->v.i_lock);
|
||||||
|
bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);
|
||||||
|
spin_unlock(&inode->v.i_lock);
|
||||||
|
|
||||||
|
if (remove) {
|
||||||
|
int ret = rhashtable_remove_fast(&c->vfs_inodes_table,
|
||||||
|
&inode->hash, bch2_vfs_inodes_params);
|
||||||
|
BUG_ON(ret);
|
||||||
|
inode->v.i_hash.pprev = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode)
|
||||||
|
{
|
||||||
|
struct bch_inode_info *old = inode;
|
||||||
|
|
||||||
|
set_bit(EI_INODE_HASHED, &inode->ei_flags);
|
||||||
|
retry:
|
||||||
|
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
|
||||||
|
&inode->hash,
|
||||||
|
bch2_vfs_inodes_params))) {
|
||||||
|
old = bch2_inode_hash_find(c, inode->ei_inum);
|
||||||
|
if (!old)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
|
clear_bit(EI_INODE_HASHED, &inode->ei_flags);
|
||||||
|
|
||||||
if (unlikely(old != inode)) {
|
|
||||||
/*
|
/*
|
||||||
* bcachefs doesn't use I_NEW; we have no use for it since we
|
* bcachefs doesn't use I_NEW; we have no use for it since we
|
||||||
* only insert fully created inodes in the inode hash table. But
|
* only insert fully created inodes in the inode hash table. But
|
||||||
@ -211,16 +256,13 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
|
|||||||
discard_new_inode(&inode->v);
|
discard_new_inode(&inode->v);
|
||||||
inode = old;
|
inode = old;
|
||||||
} else {
|
} else {
|
||||||
|
inode_fake_hash(&inode->v);
|
||||||
|
|
||||||
|
inode_sb_list_add(&inode->v);
|
||||||
|
|
||||||
mutex_lock(&c->vfs_inodes_lock);
|
mutex_lock(&c->vfs_inodes_lock);
|
||||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||||
mutex_unlock(&c->vfs_inodes_lock);
|
mutex_unlock(&c->vfs_inodes_lock);
|
||||||
/*
|
|
||||||
* Again, I_NEW makes no sense for bcachefs. This is only needed
|
|
||||||
* for clearing I_NEW, but since the inode was already fully
|
|
||||||
* created and initialized we didn't actually want
|
|
||||||
* inode_insert5() to set it for us.
|
|
||||||
*/
|
|
||||||
unlock_new_inode(&inode->v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return inode;
|
return inode;
|
||||||
@ -285,11 +327,7 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
|
|||||||
|
|
||||||
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||||
{
|
{
|
||||||
struct bch_inode_info *inode =
|
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
|
||||||
to_bch_ei(ilookup5_nowait(c->vfs_sb,
|
|
||||||
bch2_inode_hash(inum),
|
|
||||||
bch2_iget5_test,
|
|
||||||
&inum));
|
|
||||||
if (inode)
|
if (inode)
|
||||||
return &inode->v;
|
return &inode->v;
|
||||||
|
|
||||||
@ -303,7 +341,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
|||||||
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
|
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||||
inode = bch2_inode_insert(c, inode);
|
inode = bch2_inode_hash_insert(c, inode);
|
||||||
}
|
}
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
|
|
||||||
@ -351,7 +389,7 @@ __bch2_create(struct mnt_idmap *idmap,
|
|||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
|
ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
|
||||||
bch2_create_trans(trans,
|
bch2_create_trans(trans,
|
||||||
inode_inum(dir), &dir_u, &inode_u,
|
inode_inum(dir), &dir_u, &inode_u,
|
||||||
!(flags & BCH_CREATE_TMPFILE)
|
!(flags & BCH_CREATE_TMPFILE)
|
||||||
@ -365,7 +403,7 @@ retry:
|
|||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
goto err_before_quota;
|
goto err_before_quota;
|
||||||
|
|
||||||
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
|
inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;
|
||||||
inum.inum = inode_u.bi_inum;
|
inum.inum = inode_u.bi_inum;
|
||||||
|
|
||||||
ret = bch2_subvolume_get(trans, inum.subvol, true,
|
ret = bch2_subvolume_get(trans, inum.subvol, true,
|
||||||
@ -396,7 +434,7 @@ err_before_quota:
|
|||||||
* bch2_trans_exit() and dropping locks, else we could race with another
|
* bch2_trans_exit() and dropping locks, else we could race with another
|
||||||
* thread pulling the inode in and modifying it:
|
* thread pulling the inode in and modifying it:
|
||||||
*/
|
*/
|
||||||
inode = bch2_inode_insert(c, inode);
|
inode = bch2_inode_hash_insert(c, inode);
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
err:
|
err:
|
||||||
posix_acl_release(default_acl);
|
posix_acl_release(default_acl);
|
||||||
@ -436,11 +474,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
struct bch_inode_info *inode =
|
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
|
||||||
to_bch_ei(ilookup5_nowait(c->vfs_sb,
|
|
||||||
bch2_inode_hash(inum),
|
|
||||||
bch2_iget5_test,
|
|
||||||
&inum));
|
|
||||||
if (inode)
|
if (inode)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@ -470,7 +504,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||||
inode = bch2_inode_insert(c, inode);
|
inode = bch2_inode_hash_insert(c, inode);
|
||||||
out:
|
out:
|
||||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
@ -557,8 +591,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
|
|||||||
|
|
||||||
lockdep_assert_held(&inode->v.i_rwsem);
|
lockdep_assert_held(&inode->v.i_rwsem);
|
||||||
|
|
||||||
ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
|
ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
|
||||||
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||||
__bch2_link(c, inode, dir, dentry);
|
__bch2_link(c, inode, dir, dentry);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return bch2_err_class(ret);
|
return bch2_err_class(ret);
|
||||||
@ -614,7 +648,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
|||||||
struct bch_inode_info *dir= to_bch_ei(vdir);
|
struct bch_inode_info *dir= to_bch_ei(vdir);
|
||||||
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
||||||
|
|
||||||
int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
|
int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
|
||||||
__bch2_unlink(vdir, dentry, false);
|
__bch2_unlink(vdir, dentry, false);
|
||||||
return bch2_err_class(ret);
|
return bch2_err_class(ret);
|
||||||
}
|
}
|
||||||
@ -697,8 +731,8 @@ static int bch2_rename2(struct mnt_idmap *idmap,
|
|||||||
|
|
||||||
trans = bch2_trans_get(c);
|
trans = bch2_trans_get(c);
|
||||||
|
|
||||||
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
|
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
|
||||||
bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
|
bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -899,7 +933,7 @@ static int bch2_getattr(struct mnt_idmap *idmap,
|
|||||||
stat->blksize = block_bytes(c);
|
stat->blksize = block_bytes(c);
|
||||||
stat->blocks = inode->v.i_blocks;
|
stat->blocks = inode->v.i_blocks;
|
||||||
|
|
||||||
stat->subvol = inode->ei_subvol;
|
stat->subvol = inode->ei_inum.subvol;
|
||||||
stat->result_mask |= STATX_SUBVOL;
|
stat->result_mask |= STATX_SUBVOL;
|
||||||
|
|
||||||
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
|
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
|
||||||
@ -941,7 +975,7 @@ static int bch2_setattr(struct mnt_idmap *idmap,
|
|||||||
|
|
||||||
lockdep_assert_held(&inode->v.i_rwsem);
|
lockdep_assert_held(&inode->v.i_rwsem);
|
||||||
|
|
||||||
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||||
setattr_prepare(idmap, dentry, iattr);
|
setattr_prepare(idmap, dentry, iattr);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -1053,7 +1087,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
|||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
|
ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -1173,7 +1207,7 @@ static int bch2_open(struct inode *vinode, struct file *file)
|
|||||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||||
|
|
||||||
int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
|
int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1305,8 +1339,8 @@ static int bcachefs_fid_valid(int fh_len, int fh_type)
|
|||||||
static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
|
static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
|
||||||
{
|
{
|
||||||
return (struct bcachefs_fid) {
|
return (struct bcachefs_fid) {
|
||||||
.inum = inode->ei_inode.bi_inum,
|
.inum = inode->ei_inum.inum,
|
||||||
.subvol = inode->ei_subvol,
|
.subvol = inode->ei_inum.subvol,
|
||||||
.gen = inode->ei_inode.bi_generation,
|
.gen = inode->ei_inode.bi_generation,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -1391,7 +1425,7 @@ static struct dentry *bch2_get_parent(struct dentry *child)
|
|||||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||||
subvol_inum parent_inum = {
|
subvol_inum parent_inum = {
|
||||||
.subvol = inode->ei_inode.bi_parent_subvol ?:
|
.subvol = inode->ei_inode.bi_parent_subvol ?:
|
||||||
inode->ei_subvol,
|
inode->ei_inum.subvol,
|
||||||
.inum = inode->ei_inode.bi_dir,
|
.inum = inode->ei_inode.bi_dir,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1427,7 +1461,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
|
|||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
|
ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -1458,8 +1492,7 @@ retry:
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (target.subvol == inode->ei_subvol &&
|
if (subvol_inum_eq(target, inode->ei_inum))
|
||||||
target.inum == inode->ei_inode.bi_inum)
|
|
||||||
goto found;
|
goto found;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
@ -1480,8 +1513,7 @@ retry:
|
|||||||
if (ret)
|
if (ret)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (target.subvol == inode->ei_subvol &&
|
if (subvol_inum_eq(target, inode->ei_inum))
|
||||||
target.inum == inode->ei_inode.bi_inum)
|
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1518,7 +1550,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
|||||||
struct bch_inode_unpacked *bi,
|
struct bch_inode_unpacked *bi,
|
||||||
struct bch_subvolume *subvol)
|
struct bch_subvolume *subvol)
|
||||||
{
|
{
|
||||||
bch2_iget5_set(&inode->v, &inum);
|
inode->v.i_ino = inum.inum;
|
||||||
|
inode->ei_inum = inum;
|
||||||
|
inode->ei_inode.bi_inum = inum.inum;
|
||||||
bch2_inode_update_after_write(trans, inode, bi, ~0);
|
bch2_inode_update_after_write(trans, inode, bi, ~0);
|
||||||
|
|
||||||
inode->v.i_blocks = bi->bi_sectors;
|
inode->v.i_blocks = bi->bi_sectors;
|
||||||
@ -1530,7 +1564,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
|||||||
inode->ei_flags = 0;
|
inode->ei_flags = 0;
|
||||||
inode->ei_quota_reserved = 0;
|
inode->ei_quota_reserved = 0;
|
||||||
inode->ei_qid = bch_qid(bi);
|
inode->ei_qid = bch_qid(bi);
|
||||||
inode->ei_subvol = inum.subvol;
|
|
||||||
|
|
||||||
if (BCH_SUBVOLUME_SNAP(subvol))
|
if (BCH_SUBVOLUME_SNAP(subvol))
|
||||||
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
|
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
|
||||||
@ -1597,6 +1630,17 @@ static void bch2_evict_inode(struct inode *vinode)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = vinode->i_sb->s_fs_info;
|
struct bch_fs *c = vinode->i_sb->s_fs_info;
|
||||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||||
|
bool delete = !inode->v.i_nlink && !is_bad_inode(&inode->v);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* evict() has waited for outstanding writeback, we'll do no more IO
|
||||||
|
* through this inode: it's safe to remove from VFS inode hashtable here
|
||||||
|
*
|
||||||
|
* Do that now so that other threads aren't blocked from pulling it back
|
||||||
|
* in, there's no reason for them to be:
|
||||||
|
*/
|
||||||
|
if (!delete)
|
||||||
|
bch2_inode_hash_remove(c, inode);
|
||||||
|
|
||||||
truncate_inode_pages_final(&inode->v.i_data);
|
truncate_inode_pages_final(&inode->v.i_data);
|
||||||
|
|
||||||
@ -1604,12 +1648,18 @@ static void bch2_evict_inode(struct inode *vinode)
|
|||||||
|
|
||||||
BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
|
BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
|
||||||
|
|
||||||
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
|
if (delete) {
|
||||||
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
|
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
|
||||||
KEY_TYPE_QUOTA_WARN);
|
KEY_TYPE_QUOTA_WARN);
|
||||||
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
|
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
|
||||||
KEY_TYPE_QUOTA_WARN);
|
KEY_TYPE_QUOTA_WARN);
|
||||||
bch2_inode_rm(c, inode_inum(inode));
|
bch2_inode_rm(c, inode_inum(inode));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are deleting, we need it present in the vfs hash table
|
||||||
|
* so that fsck can check if unlinked inodes are still open:
|
||||||
|
*/
|
||||||
|
bch2_inode_hash_remove(c, inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&c->vfs_inodes_lock);
|
mutex_lock(&c->vfs_inodes_lock);
|
||||||
@ -1639,7 +1689,7 @@ again:
|
|||||||
|
|
||||||
mutex_lock(&c->vfs_inodes_lock);
|
mutex_lock(&c->vfs_inodes_lock);
|
||||||
list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
|
list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
|
||||||
if (!snapshot_list_has_id(s, inode->ei_subvol))
|
if (!snapshot_list_has_id(s, inode->ei_inum.subvol))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!(inode->v.i_state & I_DONTCACHE) &&
|
if (!(inode->v.i_state & I_DONTCACHE) &&
|
||||||
@ -2127,6 +2177,17 @@ static int bch2_init_fs_context(struct fs_context *fc)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bch2_fs_vfs_exit(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
if (c->vfs_inodes_table.tbl)
|
||||||
|
rhashtable_destroy(&c->vfs_inodes_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_fs_vfs_init(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params);
|
||||||
|
}
|
||||||
|
|
||||||
static struct file_system_type bcache_fs_type = {
|
static struct file_system_type bcache_fs_type = {
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
.name = "bcachefs",
|
.name = "bcachefs",
|
||||||
|
@ -13,6 +13,9 @@
|
|||||||
|
|
||||||
struct bch_inode_info {
|
struct bch_inode_info {
|
||||||
struct inode v;
|
struct inode v;
|
||||||
|
struct rhash_head hash;
|
||||||
|
subvol_inum ei_inum;
|
||||||
|
|
||||||
struct list_head ei_vfs_inode_list;
|
struct list_head ei_vfs_inode_list;
|
||||||
unsigned long ei_flags;
|
unsigned long ei_flags;
|
||||||
|
|
||||||
@ -24,8 +27,6 @@ struct bch_inode_info {
|
|||||||
struct mutex ei_quota_lock;
|
struct mutex ei_quota_lock;
|
||||||
struct bch_qid ei_qid;
|
struct bch_qid ei_qid;
|
||||||
|
|
||||||
u32 ei_subvol;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we've been doing nocow writes we'll need to issue flushes to the
|
* When we've been doing nocow writes we'll need to issue flushes to the
|
||||||
* underlying block devices
|
* underlying block devices
|
||||||
@ -50,10 +51,7 @@ struct bch_inode_info {
|
|||||||
|
|
||||||
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
|
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
|
||||||
{
|
{
|
||||||
return (subvol_inum) {
|
return inode->ei_inum;
|
||||||
.subvol = inode->ei_subvol,
|
|
||||||
.inum = inode->ei_inode.bi_inum,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
|
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
|
||||||
@ -69,6 +67,7 @@ struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
|
|||||||
* those:
|
* those:
|
||||||
*/
|
*/
|
||||||
#define EI_INODE_SNAPSHOT 1
|
#define EI_INODE_SNAPSHOT 1
|
||||||
|
#define EI_INODE_HASHED 2
|
||||||
|
|
||||||
#define to_bch_ei(_inode) \
|
#define to_bch_ei(_inode) \
|
||||||
container_of_or_null(_inode, struct bch_inode_info, v)
|
container_of_or_null(_inode, struct bch_inode_info, v)
|
||||||
@ -189,6 +188,9 @@ int __bch2_unlink(struct inode *, struct dentry *, bool);
|
|||||||
|
|
||||||
void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
|
void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
|
||||||
|
|
||||||
|
void bch2_fs_vfs_exit(struct bch_fs *);
|
||||||
|
int bch2_fs_vfs_init(struct bch_fs *);
|
||||||
|
|
||||||
void bch2_vfs_exit(void);
|
void bch2_vfs_exit(void);
|
||||||
int bch2_vfs_init(void);
|
int bch2_vfs_init(void);
|
||||||
|
|
||||||
@ -203,6 +205,10 @@ static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, su
|
|||||||
|
|
||||||
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
||||||
snapshot_id_list *s) {}
|
snapshot_id_list *s) {}
|
||||||
|
|
||||||
|
static inline void bch2_fs_vfs_exit(struct bch_fs *c) {}
|
||||||
|
static inline int bch2_fs_vfs_init(struct bch_fs *c) { return 0; }
|
||||||
|
|
||||||
static inline void bch2_vfs_exit(void) {}
|
static inline void bch2_vfs_exit(void) {}
|
||||||
static inline int bch2_vfs_init(void) { return 0; }
|
static inline int bch2_vfs_init(void) { return 0; }
|
||||||
|
|
||||||
|
@ -365,7 +365,7 @@ int bch2_inode_peek(struct btree_trans *trans,
|
|||||||
subvol_inum inum, unsigned flags)
|
subvol_inum inum, unsigned flags)
|
||||||
{
|
{
|
||||||
int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
|
int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
|
||||||
bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
|
bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,7 +30,8 @@ struct snapshot_table {
|
|||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
u32 subvol;
|
/* we can't have padding in this struct: */
|
||||||
|
u64 subvol;
|
||||||
u64 inum;
|
u64 inum;
|
||||||
} subvol_inum;
|
} subvol_inum;
|
||||||
|
|
||||||
|
@ -543,6 +543,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
|||||||
bch2_fs_fs_io_direct_exit(c);
|
bch2_fs_fs_io_direct_exit(c);
|
||||||
bch2_fs_fs_io_buffered_exit(c);
|
bch2_fs_fs_io_buffered_exit(c);
|
||||||
bch2_fs_fsio_exit(c);
|
bch2_fs_fsio_exit(c);
|
||||||
|
bch2_fs_vfs_exit(c);
|
||||||
bch2_fs_ec_exit(c);
|
bch2_fs_ec_exit(c);
|
||||||
bch2_fs_encryption_exit(c);
|
bch2_fs_encryption_exit(c);
|
||||||
bch2_fs_nocow_locking_exit(c);
|
bch2_fs_nocow_locking_exit(c);
|
||||||
@ -926,6 +927,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
bch2_fs_encryption_init(c) ?:
|
bch2_fs_encryption_init(c) ?:
|
||||||
bch2_fs_compress_init(c) ?:
|
bch2_fs_compress_init(c) ?:
|
||||||
bch2_fs_ec_init(c) ?:
|
bch2_fs_ec_init(c) ?:
|
||||||
|
bch2_fs_vfs_init(c) ?:
|
||||||
bch2_fs_fsio_init(c) ?:
|
bch2_fs_fsio_init(c) ?:
|
||||||
bch2_fs_fs_io_buffered_init(c) ?:
|
bch2_fs_fs_io_buffered_init(c) ?:
|
||||||
bch2_fs_fs_io_direct_init(c);
|
bch2_fs_fs_io_direct_init(c);
|
||||||
|
@ -306,7 +306,7 @@ retry:
|
|||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
iter = (struct btree_iter) { NULL };
|
iter = (struct btree_iter) { NULL };
|
||||||
|
|
||||||
ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
|
ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user