2017-03-17 06:18:50 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef _BCACHEFS_FS_H
|
|
|
|
#define _BCACHEFS_FS_H
|
|
|
|
|
2018-07-17 18:12:42 +00:00
|
|
|
#include "inode.h"
|
2017-03-17 06:18:50 +00:00
|
|
|
#include "opts.h"
|
|
|
|
#include "str_hash.h"
|
|
|
|
#include "quota_types.h"
|
2022-11-04 17:25:57 +00:00
|
|
|
#include "two_state_shared_lock.h"
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
#include <linux/seqlock.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
|
|
|
|
struct bch_inode_info {
|
|
|
|
struct inode v;
|
2023-03-15 15:53:51 +00:00
|
|
|
struct list_head ei_vfs_inode_list;
|
2020-12-03 19:27:20 +00:00
|
|
|
unsigned long ei_flags;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
struct mutex ei_update_lock;
|
|
|
|
u64 ei_quota_reserved;
|
|
|
|
unsigned long ei_last_dirtied;
|
2022-11-04 17:25:57 +00:00
|
|
|
two_state_lock_t ei_pagecache_lock;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
struct mutex ei_quota_lock;
|
|
|
|
struct bch_qid ei_qid;
|
|
|
|
|
2021-03-16 05:33:39 +00:00
|
|
|
u32 ei_subvol;
|
|
|
|
|
bcachefs: Nocow support
This adds support for nocow mode, where we do writes in-place when
possible. Patch components:
- New boolean filesystem and inode option, nocow: note that when nocow
is enabled, data checksumming and compression are implicitly disabled
- To prevent in-place writes from racing with data moves
(data_update.c) or bucket reuse (i.e. a bucket being reused and
re-allocated while a nocow write is in flight, we have a new locking
mechanism.
Buckets can be locked for either data update or data move, using a
fixed size hash table of two_state_shared locks. We don't have any
chaining, meaning updates and moves to different buckets that hash to
the same lock will wait unnecessarily - we'll want to watch for this
becoming an issue.
- The allocator path also needs to check for in-place writes in flight
to a given bucket before giving it out: thus we add another counter
to bucket_alloc_state so we can track this.
- Fsync now may need to issue cache flushes to block devices instead of
flushing the journal. We add a device bitmask to bch_inode_info,
ei_devs_need_flush, which tracks devices that need to have flushes
issued - note that this will lead to unnecessary flushes when other
codepaths have already issued flushes, we may want to replace this with
a sequence number.
- New nocow write path: look up extents, and if they're writable write
to them - otherwise fall back to the normal COW write path.
XXX: switch to sequence numbers instead of bitmask for devs needing
journal flush
XXX: ei_quota_lock being a mutex means bch2_nocow_write_done() needs to
run in process context - see if we can improve this
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2022-11-02 21:12:00 +00:00
|
|
|
/*
|
|
|
|
* When we've been doing nocow writes we'll need to issue flushes to the
|
|
|
|
* underlying block devices
|
|
|
|
*
|
|
|
|
* XXX: a device may have had a flush issued by some other codepath. It
|
|
|
|
* would be better to keep for each device a sequence number that's
|
|
|
|
* incremented when we isusue a cache flush, and track here the sequence
|
|
|
|
* number that needs flushing.
|
|
|
|
*/
|
|
|
|
struct bch_devs_mask ei_devs_need_flush;
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
/* copy of inode in btree: */
|
|
|
|
struct bch_inode_unpacked ei_inode;
|
|
|
|
};
|
|
|
|
|
2022-11-04 17:25:57 +00:00
|
|
|
#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0)
|
|
|
|
#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0)
|
|
|
|
#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0)
|
|
|
|
|
|
|
|
#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1)
|
|
|
|
#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1)
|
|
|
|
|
2021-03-16 05:33:39 +00:00
|
|
|
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
|
|
|
|
{
|
|
|
|
return (subvol_inum) {
|
|
|
|
.subvol = inode->ei_subvol,
|
|
|
|
.inum = inode->ei_inode.bi_inum,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-12-03 19:27:20 +00:00
|
|
|
/*
|
|
|
|
* Set if we've gotten a btree error for this inode, and thus the vfs inode and
|
|
|
|
* btree inode may be inconsistent:
|
|
|
|
*/
|
|
|
|
#define EI_INODE_ERROR 0
|
|
|
|
|
2021-10-27 17:05:56 +00:00
|
|
|
/*
|
|
|
|
* Set in the inode is in a snapshot subvolume - we don't do quota accounting in
|
|
|
|
* those:
|
|
|
|
*/
|
|
|
|
#define EI_INODE_SNAPSHOT 1
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
#define to_bch_ei(_inode) \
|
|
|
|
container_of_or_null(_inode, struct bch_inode_info, v)
|
|
|
|
|
2018-12-17 11:11:14 +00:00
|
|
|
static inline int ptrcmp(void *l, void *r)
|
|
|
|
{
|
2019-04-12 08:54:12 +00:00
|
|
|
return cmp_int(l, r);
|
2018-12-17 11:11:14 +00:00
|
|
|
}
|
|
|
|
|
2019-06-24 22:24:38 +00:00
|
|
|
enum bch_inode_lock_op {
|
2023-11-16 20:46:50 +00:00
|
|
|
INODE_PAGECACHE_BLOCK = (1U << 0),
|
|
|
|
INODE_UPDATE_LOCK = (1U << 1),
|
2019-06-24 22:24:38 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define bch2_lock_inodes(_locks, ...) \
|
2018-12-17 11:11:14 +00:00
|
|
|
do { \
|
|
|
|
struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
|
|
|
|
unsigned i; \
|
|
|
|
\
|
2019-06-24 22:24:38 +00:00
|
|
|
bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
|
2018-12-17 11:11:14 +00:00
|
|
|
\
|
2019-06-24 22:24:38 +00:00
|
|
|
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
2018-12-17 11:11:14 +00:00
|
|
|
if (a[i] != a[i - 1]) { \
|
2019-08-16 13:59:56 +00:00
|
|
|
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
2022-11-04 17:25:57 +00:00
|
|
|
bch2_pagecache_block_get(a[i]);\
|
2019-08-16 13:59:56 +00:00
|
|
|
if ((_locks) & INODE_UPDATE_LOCK) \
|
2018-12-17 11:11:14 +00:00
|
|
|
mutex_lock_nested(&a[i]->ei_update_lock, i);\
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2019-06-24 22:24:38 +00:00
|
|
|
#define bch2_unlock_inodes(_locks, ...) \
|
|
|
|
do { \
|
|
|
|
struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
|
|
|
|
unsigned i; \
|
|
|
|
\
|
|
|
|
bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
|
|
|
|
\
|
|
|
|
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
|
|
|
if (a[i] != a[i - 1]) { \
|
2019-08-16 13:59:56 +00:00
|
|
|
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
2022-11-04 17:25:57 +00:00
|
|
|
bch2_pagecache_block_put(a[i]);\
|
2019-08-16 13:59:56 +00:00
|
|
|
if ((_locks) & INODE_UPDATE_LOCK) \
|
2019-06-24 22:24:38 +00:00
|
|
|
mutex_unlock(&a[i]->ei_update_lock); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
2018-12-17 11:11:14 +00:00
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
static inline struct bch_inode_info *file_bch_inode(struct file *file)
|
|
|
|
{
|
|
|
|
return to_bch_ei(file_inode(file));
|
|
|
|
}
|
|
|
|
|
2018-12-17 10:31:49 +00:00
|
|
|
static inline bool inode_attr_changing(struct bch_inode_info *dir,
|
|
|
|
struct bch_inode_info *inode,
|
|
|
|
enum inode_opt_id id)
|
|
|
|
{
|
|
|
|
return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
|
|
|
|
bch2_inode_opt_get(&dir->ei_inode, id) !=
|
|
|
|
bch2_inode_opt_get(&inode->ei_inode, id);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool inode_attrs_changing(struct bch_inode_info *dir,
|
|
|
|
struct bch_inode_info *inode)
|
|
|
|
{
|
|
|
|
unsigned id;
|
|
|
|
|
|
|
|
for (id = 0; id < Inode_opt_nr; id++)
|
|
|
|
if (inode_attr_changing(dir, inode, id))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
struct bch_inode_unpacked;
|
|
|
|
|
|
|
|
#ifndef NO_BCACHEFS_FS
|
|
|
|
|
2021-03-16 04:28:17 +00:00
|
|
|
struct bch_inode_info *
|
|
|
|
__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
|
2021-03-17 03:28:43 +00:00
|
|
|
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
|
2021-03-16 04:28:17 +00:00
|
|
|
|
2018-12-17 10:43:00 +00:00
|
|
|
int bch2_fs_quota_transfer(struct bch_fs *,
|
|
|
|
struct bch_inode_info *,
|
|
|
|
struct bch_qid,
|
|
|
|
unsigned,
|
|
|
|
enum quota_acct_mode);
|
|
|
|
|
2018-12-19 13:43:01 +00:00
|
|
|
static inline int bch2_set_projid(struct bch_fs *c,
|
|
|
|
struct bch_inode_info *inode,
|
|
|
|
u32 projid)
|
|
|
|
{
|
|
|
|
struct bch_qid qid = inode->ei_qid;
|
|
|
|
|
|
|
|
qid.q[QTYP_PRJ] = projid;
|
|
|
|
|
|
|
|
return bch2_fs_quota_transfer(c, inode, qid,
|
|
|
|
1 << QTYP_PRJ,
|
|
|
|
KEY_TYPE_QUOTA_PREALLOC);
|
|
|
|
}
|
|
|
|
|
2021-03-16 05:33:39 +00:00
|
|
|
struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
|
2018-12-17 11:11:14 +00:00
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
/* returns 0 if we want to do the update, or error is passed up */
|
2023-08-12 14:47:45 +00:00
|
|
|
typedef int (*inode_set_fn)(struct btree_trans *,
|
|
|
|
struct bch_inode_info *,
|
2017-03-17 06:18:50 +00:00
|
|
|
struct bch_inode_unpacked *, void *);
|
|
|
|
|
2021-11-06 04:03:40 +00:00
|
|
|
void bch2_inode_update_after_write(struct btree_trans *,
|
2017-03-17 06:18:50 +00:00
|
|
|
struct bch_inode_info *,
|
|
|
|
struct bch_inode_unpacked *,
|
|
|
|
unsigned);
|
2018-07-17 18:12:42 +00:00
|
|
|
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
|
|
|
inode_set_fn, void *, unsigned);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2021-06-15 02:29:54 +00:00
|
|
|
int bch2_setattr_nonsize(struct mnt_idmap *,
|
|
|
|
struct bch_inode_info *,
|
|
|
|
struct iattr *);
|
2021-10-11 16:03:19 +00:00
|
|
|
int __bch2_unlink(struct inode *, struct dentry *, bool);
|
2021-06-15 02:29:54 +00:00
|
|
|
|
2022-03-29 19:48:45 +00:00
|
|
|
void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
|
2021-10-28 20:24:39 +00:00
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
void bch2_vfs_exit(void);
|
|
|
|
int bch2_vfs_init(void);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2023-09-23 23:07:16 +00:00
|
|
|
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
|
2023-07-09 18:18:28 +00:00
|
|
|
|
2021-10-28 20:24:39 +00:00
|
|
|
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
2022-03-29 19:48:45 +00:00
|
|
|
snapshot_id_list *s) {}
|
2017-03-17 06:18:50 +00:00
|
|
|
static inline void bch2_vfs_exit(void) {}
|
|
|
|
static inline int bch2_vfs_init(void) { return 0; }
|
|
|
|
|
|
|
|
#endif /* NO_BCACHEFS_FS */
|
|
|
|
|
|
|
|
#endif /* _BCACHEFS_FS_H */
|