2017-03-17 06:18:50 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef _BCACHEFS_BTREE_IO_H
|
|
|
|
#define _BCACHEFS_BTREE_IO_H
|
|
|
|
|
2020-01-07 18:29:32 +00:00
|
|
|
#include "bkey_methods.h"
|
2017-03-17 06:18:50 +00:00
|
|
|
#include "bset.h"
|
2019-01-13 21:02:22 +00:00
|
|
|
#include "btree_locking.h"
|
2020-08-05 03:10:08 +00:00
|
|
|
#include "checksum.h"
|
2017-03-17 06:18:50 +00:00
|
|
|
#include "extents.h"
|
|
|
|
#include "io_types.h"
|
|
|
|
|
|
|
|
struct bch_fs;
|
|
|
|
struct btree_write;
|
|
|
|
struct btree;
|
|
|
|
struct btree_iter;
|
2021-05-22 03:57:37 +00:00
|
|
|
struct btree_node_read_all;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2020-11-09 18:01:52 +00:00
|
|
|
static inline bool btree_node_dirty(struct btree *b)
|
|
|
|
{
|
|
|
|
return test_bit(BTREE_NODE_dirty, &b->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b)
|
|
|
|
{
|
|
|
|
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
|
|
|
|
atomic_inc(&c->btree_cache.dirty);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
|
|
|
|
{
|
|
|
|
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
|
|
|
|
atomic_dec(&c->btree_cache.dirty);
|
|
|
|
}
|
|
|
|
|
2021-07-10 17:44:42 +00:00
|
|
|
static inline unsigned btree_ptr_sectors_written(struct bkey_i *k)
|
|
|
|
{
|
|
|
|
return k->k.type == KEY_TYPE_btree_ptr_v2
|
|
|
|
? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written)
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
struct btree_read_bio {
|
|
|
|
struct bch_fs *c;
|
2021-05-22 03:57:37 +00:00
|
|
|
struct btree *b;
|
|
|
|
struct btree_node_read_all *ra;
|
2017-03-17 06:18:50 +00:00
|
|
|
u64 start_time;
|
|
|
|
unsigned have_ioref:1;
|
2021-05-22 03:57:37 +00:00
|
|
|
unsigned idx:7;
|
2018-10-02 15:03:39 +00:00
|
|
|
struct extent_ptr_decoded pick;
|
2017-03-17 06:18:50 +00:00
|
|
|
struct work_struct work;
|
|
|
|
struct bio bio;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct btree_write_bio {
|
|
|
|
struct work_struct work;
|
2021-04-06 19:28:34 +00:00
|
|
|
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
2020-07-25 19:07:37 +00:00
|
|
|
void *data;
|
2021-07-10 17:44:42 +00:00
|
|
|
unsigned data_bytes;
|
|
|
|
unsigned sector_offset;
|
2017-03-17 06:18:50 +00:00
|
|
|
struct bch_write_bio wbio;
|
|
|
|
};
|
|
|
|
|
2021-07-11 03:03:15 +00:00
|
|
|
void bch2_btree_node_io_unlock(struct btree *);
|
|
|
|
void bch2_btree_node_io_lock(struct btree *);
|
|
|
|
void __bch2_btree_node_wait_on_read(struct btree *);
|
|
|
|
void __bch2_btree_node_wait_on_write(struct btree *);
|
|
|
|
void bch2_btree_node_wait_on_read(struct btree *);
|
|
|
|
void bch2_btree_node_wait_on_write(struct btree *);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
static inline bool btree_node_may_write(struct btree *b)
|
|
|
|
{
|
|
|
|
return list_empty_careful(&b->write_blocked) &&
|
2019-01-13 21:02:22 +00:00
|
|
|
(!b->written || !b->will_make_reachable);
|
2017-03-17 06:18:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
enum compact_mode {
|
|
|
|
COMPACT_LAZY,
|
2019-12-13 18:08:37 +00:00
|
|
|
COMPACT_ALL,
|
2017-03-17 06:18:50 +00:00
|
|
|
};
|
|
|
|
|
2019-12-13 18:08:37 +00:00
|
|
|
bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
|
|
|
|
enum compact_mode);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2019-12-13 18:08:37 +00:00
|
|
|
static inline bool should_compact_bset_lazy(struct btree *b,
|
|
|
|
struct bset_tree *t)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
2019-10-19 23:03:23 +00:00
|
|
|
unsigned total_u64s = bset_u64s(t);
|
2019-12-13 18:08:37 +00:00
|
|
|
unsigned dead_u64s = bset_dead_u64s(b, t);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2019-10-19 23:03:23 +00:00
|
|
|
return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
|
2017-03-17 06:18:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
|
|
|
|
{
|
|
|
|
struct bset_tree *t;
|
|
|
|
|
|
|
|
for_each_bset(b, t)
|
|
|
|
if (should_compact_bset_lazy(b, t))
|
2019-12-13 18:08:37 +00:00
|
|
|
return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-08-05 03:10:08 +00:00
|
|
|
static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
|
|
|
|
{
|
|
|
|
return (struct nonce) {{
|
|
|
|
[0] = cpu_to_le32(offset),
|
|
|
|
[1] = ((__le32 *) &i->seq)[0],
|
|
|
|
[2] = ((__le32 *) &i->seq)[1],
|
|
|
|
[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2022-02-19 05:42:12 +00:00
|
|
|
static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
|
2020-08-05 03:10:08 +00:00
|
|
|
{
|
|
|
|
struct nonce nonce = btree_nonce(i, offset);
|
2022-02-19 05:42:12 +00:00
|
|
|
int ret;
|
2020-08-05 03:10:08 +00:00
|
|
|
|
|
|
|
if (!offset) {
|
|
|
|
struct btree_node *bn = container_of(i, struct btree_node, keys);
|
|
|
|
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
|
|
|
|
|
2022-02-19 05:42:12 +00:00
|
|
|
ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
|
|
|
&bn->flags, bytes);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2020-08-05 03:10:08 +00:00
|
|
|
|
|
|
|
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
|
|
|
|
}
|
|
|
|
|
2022-02-19 05:42:12 +00:00
|
|
|
return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
|
|
|
vstruct_end(i) - (void *) i->_data);
|
2020-08-05 03:10:08 +00:00
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
|
|
|
|
|
2021-04-24 20:32:35 +00:00
|
|
|
void bch2_btree_node_drop_keys_outside_node(struct btree *);
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
void bch2_btree_build_aux_trees(struct btree *);
|
2021-08-30 18:36:03 +00:00
|
|
|
void bch2_btree_init_next(struct btree_trans *, struct btree *);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2021-02-02 22:08:54 +00:00
|
|
|
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
|
|
|
|
struct btree *, bool);
|
2017-03-17 06:18:50 +00:00
|
|
|
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
|
|
|
|
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
|
|
|
const struct bkey_i *, unsigned);
|
|
|
|
|
|
|
|
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
|
|
|
struct btree_write *);
|
|
|
|
|
2021-07-11 20:41:14 +00:00
|
|
|
void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
|
2017-03-17 06:18:50 +00:00
|
|
|
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
|
|
|
|
|
|
|
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
|
|
|
enum six_lock_type);
|
|
|
|
|
2020-02-09 00:06:31 +00:00
|
|
|
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
|
|
|
|
enum six_lock_type lock_held)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
2021-07-11 20:41:14 +00:00
|
|
|
if (b->written &&
|
|
|
|
btree_node_need_write(b) &&
|
|
|
|
btree_node_may_write(b) &&
|
|
|
|
!btree_node_write_in_flight(b))
|
|
|
|
bch2_btree_node_write(c, b, lock_held);
|
2017-03-17 06:18:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define bch2_btree_node_write_cond(_c, _b, cond) \
|
|
|
|
do { \
|
2019-01-13 21:02:22 +00:00
|
|
|
unsigned long old, new, v = READ_ONCE((_b)->flags); \
|
|
|
|
\
|
|
|
|
do { \
|
|
|
|
old = new = v; \
|
2017-03-17 06:18:50 +00:00
|
|
|
\
|
2019-01-13 21:02:22 +00:00
|
|
|
if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \
|
2017-03-17 06:18:50 +00:00
|
|
|
break; \
|
|
|
|
\
|
2019-01-13 21:02:22 +00:00
|
|
|
new |= (1 << BTREE_NODE_need_write); \
|
|
|
|
} while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
|
|
|
|
\
|
2020-02-09 00:06:31 +00:00
|
|
|
btree_node_write_if_need(_c, _b, SIX_LOCK_read); \
|
2017-03-17 06:18:50 +00:00
|
|
|
} while (0)
|
|
|
|
|
|
|
|
void bch2_btree_flush_all_reads(struct bch_fs *);
|
|
|
|
void bch2_btree_flush_all_writes(struct bch_fs *);
|
2020-07-25 21:06:11 +00:00
|
|
|
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2020-01-07 18:29:32 +00:00
|
|
|
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
|
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 22:02:16 +00:00
|
|
|
unsigned version, unsigned big_endian,
|
|
|
|
int write, struct bkey_format *f)
|
2020-01-07 18:29:32 +00:00
|
|
|
{
|
|
|
|
if (version < bcachefs_metadata_version_inode_btree_change &&
|
2021-02-21 00:27:37 +00:00
|
|
|
btree_id == BTREE_ID_inodes) {
|
2020-01-07 18:29:32 +00:00
|
|
|
swap(f->bits_per_field[BKEY_FIELD_INODE],
|
|
|
|
f->bits_per_field[BKEY_FIELD_OFFSET]);
|
|
|
|
swap(f->field_offset[BKEY_FIELD_INODE],
|
|
|
|
f->field_offset[BKEY_FIELD_OFFSET]);
|
|
|
|
}
|
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 22:02:16 +00:00
|
|
|
|
|
|
|
if (version < bcachefs_metadata_version_snapshot &&
|
|
|
|
(level || btree_type_has_snapshots(btree_id))) {
|
|
|
|
u64 max_packed =
|
|
|
|
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
|
|
|
|
|
|
|
|
f->field_offset[BKEY_FIELD_SNAPSHOT] = write
|
|
|
|
? 0
|
|
|
|
: U32_MAX - max_packed;
|
|
|
|
}
|
2020-01-07 18:29:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void compat_bpos(unsigned level, enum btree_id btree_id,
|
|
|
|
unsigned version, unsigned big_endian,
|
|
|
|
int write, struct bpos *p)
|
|
|
|
{
|
|
|
|
if (big_endian != CPU_BIG_ENDIAN)
|
|
|
|
bch2_bpos_swab(p);
|
|
|
|
|
|
|
|
if (version < bcachefs_metadata_version_inode_btree_change &&
|
2021-02-21 00:27:37 +00:00
|
|
|
btree_id == BTREE_ID_inodes)
|
2020-01-07 18:29:32 +00:00
|
|
|
swap(p->inode, p->offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
|
|
|
|
unsigned version, unsigned big_endian,
|
|
|
|
int write,
|
|
|
|
struct btree_node *bn)
|
|
|
|
{
|
|
|
|
if (version < bcachefs_metadata_version_inode_btree_change &&
|
|
|
|
btree_node_type_is_extents(btree_id) &&
|
2021-03-04 21:20:16 +00:00
|
|
|
bpos_cmp(bn->min_key, POS_MIN) &&
|
2020-01-07 18:29:32 +00:00
|
|
|
write)
|
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 22:02:16 +00:00
|
|
|
bn->min_key = bpos_nosnap_predecessor(bn->min_key);
|
|
|
|
|
|
|
|
if (version < bcachefs_metadata_version_snapshot &&
|
|
|
|
write)
|
|
|
|
bn->max_key.snapshot = 0;
|
2020-01-07 18:29:32 +00:00
|
|
|
|
|
|
|
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
|
|
|
|
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
|
|
|
|
|
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 22:02:16 +00:00
|
|
|
if (version < bcachefs_metadata_version_snapshot &&
|
|
|
|
!write)
|
|
|
|
bn->max_key.snapshot = U32_MAX;
|
|
|
|
|
2020-01-07 18:29:32 +00:00
|
|
|
if (version < bcachefs_metadata_version_inode_btree_change &&
|
|
|
|
btree_node_type_is_extents(btree_id) &&
|
2021-03-04 21:20:16 +00:00
|
|
|
bpos_cmp(bn->min_key, POS_MIN) &&
|
2020-01-07 18:29:32 +00:00
|
|
|
!write)
|
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2021-03-24 22:02:16 +00:00
|
|
|
bn->min_key = bpos_nosnap_successor(bn->min_key);
|
2020-01-07 18:29:32 +00:00
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
#endif /* _BCACHEFS_BTREE_IO_H */
|