mirror of
https://github.com/torvalds/linux.git
synced 2024-12-24 11:51:27 +00:00
Merge branch 'for-linus' into test
* for-linus: block, bfq: add requeue-request hook bcache: fix for data collapse after re-attaching an attached device bcache: return attach error when no cache set exist bcache: set writeback_rate_update_seconds in range [1, 60] seconds bcache: fix for allocator and register thread race bcache: set error_limit correctly bcache: properly set task state in bch_writeback_thread() bcache: fix high CPU occupancy during journal bcache: add journal statistic block: Add should_fail_bio() for bpf error injection blk-wbt: account flush requests correctly
This commit is contained in:
commit
8525e5ff45
@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We exploit the bfq_finish_request hook to decrement
|
* We exploit the bfq_finish_requeue_request hook to
|
||||||
* rq_in_driver, but bfq_finish_request will not be
|
* decrement rq_in_driver, but
|
||||||
* invoked on this request. So, to avoid unbalance,
|
* bfq_finish_requeue_request will not be invoked on
|
||||||
* just start this request, without incrementing
|
* this request. So, to avoid unbalance, just start
|
||||||
* rq_in_driver. As a negative consequence,
|
* this request, without incrementing rq_in_driver. As
|
||||||
* rq_in_driver is deceptively lower than it should be
|
* a negative consequence, rq_in_driver is deceptively
|
||||||
* while this request is in service. This may cause
|
* lower than it should be while this request is in
|
||||||
* bfq_schedule_dispatch to be invoked uselessly.
|
* service. This may cause bfq_schedule_dispatch to be
|
||||||
|
* invoked uselessly.
|
||||||
*
|
*
|
||||||
* As for implementing an exact solution, the
|
* As for implementing an exact solution, the
|
||||||
* bfq_finish_request hook, if defined, is probably
|
* bfq_finish_requeue_request hook, if defined, is
|
||||||
* invoked also on this request. So, by exploiting
|
* probably invoked also on this request. So, by
|
||||||
* this hook, we could 1) increment rq_in_driver here,
|
* exploiting this hook, we could 1) increment
|
||||||
* and 2) decrement it in bfq_finish_request. Such a
|
* rq_in_driver here, and 2) decrement it in
|
||||||
* solution would let the value of the counter be
|
* bfq_finish_requeue_request. Such a solution would
|
||||||
* always accurate, but it would entail using an extra
|
* let the value of the counter be always accurate,
|
||||||
* interface function. This cost seems higher than the
|
* but it would entail using an extra interface
|
||||||
* benefit, being the frequency of non-elevator-private
|
* function. This cost seems higher than the benefit,
|
||||||
|
* being the frequency of non-elevator-private
|
||||||
* requests very low.
|
* requests very low.
|
||||||
*/
|
*/
|
||||||
goto start_rq;
|
goto start_rq;
|
||||||
@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
|||||||
unsigned int cmd_flags) {}
|
unsigned int cmd_flags) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void bfq_prepare_request(struct request *rq, struct bio *bio);
|
||||||
|
|
||||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||||
bool at_head)
|
bool at_head)
|
||||||
{
|
{
|
||||||
@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
|||||||
else
|
else
|
||||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||||
} else {
|
} else {
|
||||||
|
if (WARN_ON_ONCE(!bfqq)) {
|
||||||
|
/*
|
||||||
|
* This should never happen. Most likely rq is
|
||||||
|
* a requeued regular request, being
|
||||||
|
* re-inserted without being first
|
||||||
|
* re-prepared. Do a prepare, to avoid
|
||||||
|
* failure.
|
||||||
|
*/
|
||||||
|
bfq_prepare_request(rq, rq->bio);
|
||||||
|
bfqq = RQ_BFQQ(rq);
|
||||||
|
}
|
||||||
|
|
||||||
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
|
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
|
||||||
/*
|
/*
|
||||||
* Update bfqq, because, if a queue merge has occurred
|
* Update bfqq, because, if a queue merge has occurred
|
||||||
@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
|||||||
bfq_schedule_dispatch(bfqd);
|
bfq_schedule_dispatch(bfqd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bfq_finish_request_body(struct bfq_queue *bfqq)
|
static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
|
||||||
{
|
{
|
||||||
bfqq->allocated--;
|
bfqq->allocated--;
|
||||||
|
|
||||||
bfq_put_queue(bfqq);
|
bfq_put_queue(bfqq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bfq_finish_request(struct request *rq)
|
/*
|
||||||
|
* Handle either a requeue or a finish for rq. The things to do are
|
||||||
|
* the same in both cases: all references to rq are to be dropped. In
|
||||||
|
* particular, rq is considered completed from the point of view of
|
||||||
|
* the scheduler.
|
||||||
|
*/
|
||||||
|
static void bfq_finish_requeue_request(struct request *rq)
|
||||||
{
|
{
|
||||||
struct bfq_queue *bfqq;
|
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||||||
struct bfq_data *bfqd;
|
struct bfq_data *bfqd;
|
||||||
|
|
||||||
if (!rq->elv.icq)
|
/*
|
||||||
|
* Requeue and finish hooks are invoked in blk-mq without
|
||||||
|
* checking whether the involved request is actually still
|
||||||
|
* referenced in the scheduler. To handle this fact, the
|
||||||
|
* following two checks make this function exit in case of
|
||||||
|
* spurious invocations, for which there is nothing to do.
|
||||||
|
*
|
||||||
|
* First, check whether rq has nothing to do with an elevator.
|
||||||
|
*/
|
||||||
|
if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rq either is not associated with any icq, or is an already
|
||||||
|
* requeued request that has not (yet) been re-inserted into
|
||||||
|
* a bfq_queue.
|
||||||
|
*/
|
||||||
|
if (!rq->elv.icq || !bfqq)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bfqq = RQ_BFQQ(rq);
|
|
||||||
bfqd = bfqq->bfqd;
|
bfqd = bfqq->bfqd;
|
||||||
|
|
||||||
if (rq->rq_flags & RQF_STARTED)
|
if (rq->rq_flags & RQF_STARTED)
|
||||||
@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
|
|||||||
spin_lock_irqsave(&bfqd->lock, flags);
|
spin_lock_irqsave(&bfqd->lock, flags);
|
||||||
|
|
||||||
bfq_completed_request(bfqq, bfqd);
|
bfq_completed_request(bfqq, bfqd);
|
||||||
bfq_finish_request_body(bfqq);
|
bfq_finish_requeue_request_body(bfqq);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Request rq may be still/already in the scheduler,
|
* Request rq may be still/already in the scheduler,
|
||||||
* in which case we need to remove it. And we cannot
|
* in which case we need to remove it (this should
|
||||||
|
* never happen in case of requeue). And we cannot
|
||||||
* defer such a check and removal, to avoid
|
* defer such a check and removal, to avoid
|
||||||
* inconsistencies in the time interval from the end
|
* inconsistencies in the time interval from the end
|
||||||
* of this function to the start of the deferred work.
|
* of this function to the start of the deferred work.
|
||||||
@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
|
|||||||
bfqg_stats_update_io_remove(bfqq_group(bfqq),
|
bfqg_stats_update_io_remove(bfqq_group(bfqq),
|
||||||
rq->cmd_flags);
|
rq->cmd_flags);
|
||||||
}
|
}
|
||||||
bfq_finish_request_body(bfqq);
|
bfq_finish_requeue_request_body(bfqq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset private fields. In case of a requeue, this allows
|
||||||
|
* this function to correctly do nothing if it is spuriously
|
||||||
|
* invoked again on this same request (see the check at the
|
||||||
|
* beginning of the function). Probably, a better general
|
||||||
|
* design would be to prevent blk-mq from invoking the requeue
|
||||||
|
* or finish hooks of an elevator, for a request that is not
|
||||||
|
* referred by that elevator.
|
||||||
|
*
|
||||||
|
* Resetting the following fields would break the
|
||||||
|
* request-insertion logic if rq is re-inserted into a bfq
|
||||||
|
* internal queue, without a re-preparation. Here we assume
|
||||||
|
* that re-insertions of requeued requests, without
|
||||||
|
* re-preparation, can happen only for pass_through or at_head
|
||||||
|
* requests (which are not re-inserted into bfq internal
|
||||||
|
* queues).
|
||||||
|
*/
|
||||||
rq->elv.priv[0] = NULL;
|
rq->elv.priv[0] = NULL;
|
||||||
rq->elv.priv[1] = NULL;
|
rq->elv.priv[1] = NULL;
|
||||||
}
|
}
|
||||||
@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
|
|||||||
.ops.mq = {
|
.ops.mq = {
|
||||||
.limit_depth = bfq_limit_depth,
|
.limit_depth = bfq_limit_depth,
|
||||||
.prepare_request = bfq_prepare_request,
|
.prepare_request = bfq_prepare_request,
|
||||||
.finish_request = bfq_finish_request,
|
.requeue_request = bfq_finish_requeue_request,
|
||||||
|
.finish_request = bfq_finish_requeue_request,
|
||||||
.exit_icq = bfq_exit_icq,
|
.exit_icq = bfq_exit_icq,
|
||||||
.insert_requests = bfq_insert_requests,
|
.insert_requests = bfq_insert_requests,
|
||||||
.dispatch_request = bfq_dispatch_request,
|
.dispatch_request = bfq_dispatch_request,
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include <linux/pm_runtime.h>
|
#include <linux/pm_runtime.h>
|
||||||
#include <linux/blk-cgroup.h>
|
#include <linux/blk-cgroup.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <trace/events/block.h>
|
#include <trace/events/block.h>
|
||||||
@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static noinline int should_fail_bio(struct bio *bio)
|
||||||
|
{
|
||||||
|
if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
|
||||||
|
return -EIO;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remap block n of partition p to block n+start(p) of the disk.
|
* Remap block n of partition p to block n+start(p) of the disk.
|
||||||
*/
|
*/
|
||||||
@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
|
|||||||
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
|
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
|
||||||
goto not_supported;
|
goto not_supported;
|
||||||
|
|
||||||
if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
|
if (should_fail_bio(bio))
|
||||||
goto end_io;
|
goto end_io;
|
||||||
|
|
||||||
if (!bio->bi_partno) {
|
if (!bio->bi_partno) {
|
||||||
|
@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
|
|||||||
|
|
||||||
static int wbt_data_dir(const struct request *rq)
|
static int wbt_data_dir(const struct request *rq)
|
||||||
{
|
{
|
||||||
return rq_data_dir(rq);
|
const int op = req_op(rq);
|
||||||
|
|
||||||
|
if (op == REQ_OP_READ)
|
||||||
|
return READ;
|
||||||
|
else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
|
||||||
|
return WRITE;
|
||||||
|
|
||||||
|
/* don't account */
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int wbt_init(struct request_queue *q)
|
int wbt_init(struct request_queue *q)
|
||||||
|
@ -287,8 +287,10 @@ do { \
|
|||||||
break; \
|
break; \
|
||||||
\
|
\
|
||||||
mutex_unlock(&(ca)->set->bucket_lock); \
|
mutex_unlock(&(ca)->set->bucket_lock); \
|
||||||
if (kthread_should_stop()) \
|
if (kthread_should_stop()) { \
|
||||||
|
set_current_state(TASK_RUNNING); \
|
||||||
return 0; \
|
return 0; \
|
||||||
|
} \
|
||||||
\
|
\
|
||||||
schedule(); \
|
schedule(); \
|
||||||
mutex_lock(&(ca)->set->bucket_lock); \
|
mutex_lock(&(ca)->set->bucket_lock); \
|
||||||
|
@ -658,10 +658,15 @@ struct cache_set {
|
|||||||
atomic_long_t writeback_keys_done;
|
atomic_long_t writeback_keys_done;
|
||||||
atomic_long_t writeback_keys_failed;
|
atomic_long_t writeback_keys_failed;
|
||||||
|
|
||||||
|
atomic_long_t reclaim;
|
||||||
|
atomic_long_t flush_write;
|
||||||
|
atomic_long_t retry_flush_write;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
ON_ERROR_UNREGISTER,
|
ON_ERROR_UNREGISTER,
|
||||||
ON_ERROR_PANIC,
|
ON_ERROR_PANIC,
|
||||||
} on_error;
|
} on_error;
|
||||||
|
#define DEFAULT_IO_ERROR_LIMIT 8
|
||||||
unsigned error_limit;
|
unsigned error_limit;
|
||||||
unsigned error_decay;
|
unsigned error_decay;
|
||||||
|
|
||||||
@ -675,6 +680,8 @@ struct cache_set {
|
|||||||
|
|
||||||
#define BUCKET_HASH_BITS 12
|
#define BUCKET_HASH_BITS 12
|
||||||
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
|
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
|
||||||
|
|
||||||
|
DECLARE_HEAP(struct btree *, flush_btree);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bbio {
|
struct bbio {
|
||||||
@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);
|
|||||||
|
|
||||||
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
|
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
|
||||||
|
|
||||||
int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
|
int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
|
||||||
void bch_cached_dev_detach(struct cached_dev *);
|
void bch_cached_dev_detach(struct cached_dev *);
|
||||||
void bch_cached_dev_run(struct cached_dev *);
|
void bch_cached_dev_run(struct cached_dev *);
|
||||||
void bcache_device_stop(struct bcache_device *);
|
void bcache_device_stop(struct bcache_device *);
|
||||||
|
@ -1869,14 +1869,17 @@ void bch_initial_gc_finish(struct cache_set *c)
|
|||||||
*/
|
*/
|
||||||
for_each_cache(ca, c, i) {
|
for_each_cache(ca, c, i) {
|
||||||
for_each_bucket(b, ca) {
|
for_each_bucket(b, ca) {
|
||||||
if (fifo_full(&ca->free[RESERVE_PRIO]))
|
if (fifo_full(&ca->free[RESERVE_PRIO]) &&
|
||||||
|
fifo_full(&ca->free[RESERVE_BTREE]))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (bch_can_invalidate_bucket(ca, b) &&
|
if (bch_can_invalidate_bucket(ca, b) &&
|
||||||
!GC_MARK(b)) {
|
!GC_MARK(b)) {
|
||||||
__bch_invalidate_one_bucket(ca, b);
|
__bch_invalidate_one_bucket(ca, b);
|
||||||
fifo_push(&ca->free[RESERVE_PRIO],
|
if (!fifo_push(&ca->free[RESERVE_PRIO],
|
||||||
b - ca->buckets);
|
b - ca->buckets))
|
||||||
|
fifo_push(&ca->free[RESERVE_BTREE],
|
||||||
|
b - ca->buckets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -368,6 +368,12 @@ err:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Journalling */
|
/* Journalling */
|
||||||
|
#define journal_max_cmp(l, r) \
|
||||||
|
(fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
|
||||||
|
fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
|
||||||
|
#define journal_min_cmp(l, r) \
|
||||||
|
(fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
|
||||||
|
fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
|
||||||
|
|
||||||
static void btree_flush_write(struct cache_set *c)
|
static void btree_flush_write(struct cache_set *c)
|
||||||
{
|
{
|
||||||
@ -375,28 +381,41 @@ static void btree_flush_write(struct cache_set *c)
|
|||||||
* Try to find the btree node with that references the oldest journal
|
* Try to find the btree node with that references the oldest journal
|
||||||
* entry, best is our current candidate and is locked if non NULL:
|
* entry, best is our current candidate and is locked if non NULL:
|
||||||
*/
|
*/
|
||||||
struct btree *b, *best;
|
struct btree *b;
|
||||||
unsigned i;
|
int i;
|
||||||
|
|
||||||
|
atomic_long_inc(&c->flush_write);
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
best = NULL;
|
spin_lock(&c->journal.lock);
|
||||||
|
if (heap_empty(&c->flush_btree)) {
|
||||||
for_each_cached_btree(b, c, i)
|
for_each_cached_btree(b, c, i)
|
||||||
if (btree_current_write(b)->journal) {
|
if (btree_current_write(b)->journal) {
|
||||||
if (!best)
|
if (!heap_full(&c->flush_btree))
|
||||||
best = b;
|
heap_add(&c->flush_btree, b,
|
||||||
else if (journal_pin_cmp(c,
|
journal_max_cmp);
|
||||||
btree_current_write(best)->journal,
|
else if (journal_max_cmp(b,
|
||||||
btree_current_write(b)->journal)) {
|
heap_peek(&c->flush_btree))) {
|
||||||
best = b;
|
c->flush_btree.data[0] = b;
|
||||||
|
heap_sift(&c->flush_btree, 0,
|
||||||
|
journal_max_cmp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
b = best;
|
for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
|
||||||
|
heap_sift(&c->flush_btree, i, journal_min_cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
b = NULL;
|
||||||
|
heap_pop(&c->flush_btree, b, journal_min_cmp);
|
||||||
|
spin_unlock(&c->journal.lock);
|
||||||
|
|
||||||
if (b) {
|
if (b) {
|
||||||
mutex_lock(&b->write_lock);
|
mutex_lock(&b->write_lock);
|
||||||
if (!btree_current_write(b)->journal) {
|
if (!btree_current_write(b)->journal) {
|
||||||
mutex_unlock(&b->write_lock);
|
mutex_unlock(&b->write_lock);
|
||||||
/* We raced */
|
/* We raced */
|
||||||
|
atomic_long_inc(&c->retry_flush_write);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,6 +495,8 @@ static void journal_reclaim(struct cache_set *c)
|
|||||||
unsigned iter, n = 0;
|
unsigned iter, n = 0;
|
||||||
atomic_t p;
|
atomic_t p;
|
||||||
|
|
||||||
|
atomic_long_inc(&c->reclaim);
|
||||||
|
|
||||||
while (!atomic_read(&fifo_front(&c->journal.pin)))
|
while (!atomic_read(&fifo_front(&c->journal.pin)))
|
||||||
fifo_pop(&c->journal.pin, p);
|
fifo_pop(&c->journal.pin, p);
|
||||||
|
|
||||||
@ -819,7 +840,8 @@ int bch_journal_alloc(struct cache_set *c)
|
|||||||
j->w[0].c = c;
|
j->w[0].c = c;
|
||||||
j->w[1].c = c;
|
j->w[1].c = c;
|
||||||
|
|
||||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
|
||||||
|
!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||||
!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
|
!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
|
||||||
!(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
|
!(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -957,7 +957,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
|
|||||||
cached_dev_put(dc);
|
cached_dev_put(dc);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
|
||||||
|
uint8_t *set_uuid)
|
||||||
{
|
{
|
||||||
uint32_t rtime = cpu_to_le32(get_seconds());
|
uint32_t rtime = cpu_to_le32(get_seconds());
|
||||||
struct uuid_entry *u;
|
struct uuid_entry *u;
|
||||||
@ -965,7 +966,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
|||||||
|
|
||||||
bdevname(dc->bdev, buf);
|
bdevname(dc->bdev, buf);
|
||||||
|
|
||||||
if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
|
if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
|
||||||
|
(!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
if (dc->disk.c) {
|
if (dc->disk.c) {
|
||||||
@ -1194,7 +1196,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
|
|||||||
|
|
||||||
list_add(&dc->list, &uncached_devices);
|
list_add(&dc->list, &uncached_devices);
|
||||||
list_for_each_entry(c, &bch_cache_sets, list)
|
list_for_each_entry(c, &bch_cache_sets, list)
|
||||||
bch_cached_dev_attach(dc, c);
|
bch_cached_dev_attach(dc, c, NULL);
|
||||||
|
|
||||||
if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
|
if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
|
||||||
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
|
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
|
||||||
@ -1553,7 +1555,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||||||
|
|
||||||
c->congested_read_threshold_us = 2000;
|
c->congested_read_threshold_us = 2000;
|
||||||
c->congested_write_threshold_us = 20000;
|
c->congested_write_threshold_us = 20000;
|
||||||
c->error_limit = 8 << IO_ERROR_SHIFT;
|
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
err:
|
err:
|
||||||
@ -1716,7 +1718,7 @@ static void run_cache_set(struct cache_set *c)
|
|||||||
bcache_write_super(c);
|
bcache_write_super(c);
|
||||||
|
|
||||||
list_for_each_entry_safe(dc, t, &uncached_devices, list)
|
list_for_each_entry_safe(dc, t, &uncached_devices, list)
|
||||||
bch_cached_dev_attach(dc, c);
|
bch_cached_dev_attach(dc, c, NULL);
|
||||||
|
|
||||||
flash_devs_run(c);
|
flash_devs_run(c);
|
||||||
|
|
||||||
@ -1833,6 +1835,7 @@ void bch_cache_release(struct kobject *kobj)
|
|||||||
static int cache_alloc(struct cache *ca)
|
static int cache_alloc(struct cache *ca)
|
||||||
{
|
{
|
||||||
size_t free;
|
size_t free;
|
||||||
|
size_t btree_buckets;
|
||||||
struct bucket *b;
|
struct bucket *b;
|
||||||
|
|
||||||
__module_get(THIS_MODULE);
|
__module_get(THIS_MODULE);
|
||||||
@ -1840,9 +1843,19 @@ static int cache_alloc(struct cache *ca)
|
|||||||
|
|
||||||
bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
|
bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when ca->sb.njournal_buckets is not zero, journal exists,
|
||||||
|
* and in bch_journal_replay(), tree node may split,
|
||||||
|
* so bucket of RESERVE_BTREE type is needed,
|
||||||
|
* the worst situation is all journal buckets are valid journal,
|
||||||
|
* and all the keys need to replay,
|
||||||
|
* so the number of RESERVE_BTREE type buckets should be as much
|
||||||
|
* as journal buckets
|
||||||
|
*/
|
||||||
|
btree_buckets = ca->sb.njournal_buckets ?: 8;
|
||||||
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
|
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
|
||||||
|
|
||||||
if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
|
if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
|
||||||
!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
|
!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
|
||||||
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
|
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
|
||||||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
|
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
|
||||||
|
@ -65,6 +65,9 @@ read_attribute(bset_tree_stats);
|
|||||||
|
|
||||||
read_attribute(state);
|
read_attribute(state);
|
||||||
read_attribute(cache_read_races);
|
read_attribute(cache_read_races);
|
||||||
|
read_attribute(reclaim);
|
||||||
|
read_attribute(flush_write);
|
||||||
|
read_attribute(retry_flush_write);
|
||||||
read_attribute(writeback_keys_done);
|
read_attribute(writeback_keys_done);
|
||||||
read_attribute(writeback_keys_failed);
|
read_attribute(writeback_keys_failed);
|
||||||
read_attribute(io_errors);
|
read_attribute(io_errors);
|
||||||
@ -195,7 +198,7 @@ STORE(__cached_dev)
|
|||||||
{
|
{
|
||||||
struct cached_dev *dc = container_of(kobj, struct cached_dev,
|
struct cached_dev *dc = container_of(kobj, struct cached_dev,
|
||||||
disk.kobj);
|
disk.kobj);
|
||||||
ssize_t v = size;
|
ssize_t v;
|
||||||
struct cache_set *c;
|
struct cache_set *c;
|
||||||
struct kobj_uevent_env *env;
|
struct kobj_uevent_env *env;
|
||||||
|
|
||||||
@ -215,7 +218,9 @@ STORE(__cached_dev)
|
|||||||
sysfs_strtoul_clamp(writeback_rate,
|
sysfs_strtoul_clamp(writeback_rate,
|
||||||
dc->writeback_rate.rate, 1, INT_MAX);
|
dc->writeback_rate.rate, 1, INT_MAX);
|
||||||
|
|
||||||
d_strtoul_nonzero(writeback_rate_update_seconds);
|
sysfs_strtoul_clamp(writeback_rate_update_seconds,
|
||||||
|
dc->writeback_rate_update_seconds,
|
||||||
|
1, WRITEBACK_RATE_UPDATE_SECS_MAX);
|
||||||
d_strtoul(writeback_rate_i_term_inverse);
|
d_strtoul(writeback_rate_i_term_inverse);
|
||||||
d_strtoul_nonzero(writeback_rate_p_term_inverse);
|
d_strtoul_nonzero(writeback_rate_p_term_inverse);
|
||||||
|
|
||||||
@ -267,17 +272,20 @@ STORE(__cached_dev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_attach) {
|
if (attr == &sysfs_attach) {
|
||||||
if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
|
uint8_t set_uuid[16];
|
||||||
|
|
||||||
|
if (bch_parse_uuid(buf, set_uuid) < 16)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
v = -ENOENT;
|
||||||
list_for_each_entry(c, &bch_cache_sets, list) {
|
list_for_each_entry(c, &bch_cache_sets, list) {
|
||||||
v = bch_cached_dev_attach(dc, c);
|
v = bch_cached_dev_attach(dc, c, set_uuid);
|
||||||
if (!v)
|
if (!v)
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_err("Can't attach %s: cache set not found", buf);
|
pr_err("Can't attach %s: cache set not found", buf);
|
||||||
size = v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_detach && dc->disk.c)
|
if (attr == &sysfs_detach && dc->disk.c)
|
||||||
@ -545,6 +553,15 @@ SHOW(__bch_cache_set)
|
|||||||
sysfs_print(cache_read_races,
|
sysfs_print(cache_read_races,
|
||||||
atomic_long_read(&c->cache_read_races));
|
atomic_long_read(&c->cache_read_races));
|
||||||
|
|
||||||
|
sysfs_print(reclaim,
|
||||||
|
atomic_long_read(&c->reclaim));
|
||||||
|
|
||||||
|
sysfs_print(flush_write,
|
||||||
|
atomic_long_read(&c->flush_write));
|
||||||
|
|
||||||
|
sysfs_print(retry_flush_write,
|
||||||
|
atomic_long_read(&c->retry_flush_write));
|
||||||
|
|
||||||
sysfs_print(writeback_keys_done,
|
sysfs_print(writeback_keys_done,
|
||||||
atomic_long_read(&c->writeback_keys_done));
|
atomic_long_read(&c->writeback_keys_done));
|
||||||
sysfs_print(writeback_keys_failed,
|
sysfs_print(writeback_keys_failed,
|
||||||
@ -556,7 +573,7 @@ SHOW(__bch_cache_set)
|
|||||||
|
|
||||||
/* See count_io_errors for why 88 */
|
/* See count_io_errors for why 88 */
|
||||||
sysfs_print(io_error_halflife, c->error_decay * 88);
|
sysfs_print(io_error_halflife, c->error_decay * 88);
|
||||||
sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
|
sysfs_print(io_error_limit, c->error_limit);
|
||||||
|
|
||||||
sysfs_hprint(congested,
|
sysfs_hprint(congested,
|
||||||
((uint64_t) bch_get_congested(c)) << 9);
|
((uint64_t) bch_get_congested(c)) << 9);
|
||||||
@ -656,7 +673,7 @@ STORE(__bch_cache_set)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_io_error_limit)
|
if (attr == &sysfs_io_error_limit)
|
||||||
c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
|
c->error_limit = strtoul_or_return(buf);
|
||||||
|
|
||||||
/* See count_io_errors() for why 88 */
|
/* See count_io_errors() for why 88 */
|
||||||
if (attr == &sysfs_io_error_halflife)
|
if (attr == &sysfs_io_error_halflife)
|
||||||
@ -731,6 +748,9 @@ static struct attribute *bch_cache_set_internal_files[] = {
|
|||||||
|
|
||||||
&sysfs_bset_tree_stats,
|
&sysfs_bset_tree_stats,
|
||||||
&sysfs_cache_read_races,
|
&sysfs_cache_read_races,
|
||||||
|
&sysfs_reclaim,
|
||||||
|
&sysfs_flush_write,
|
||||||
|
&sysfs_retry_flush_write,
|
||||||
&sysfs_writeback_keys_done,
|
&sysfs_writeback_keys_done,
|
||||||
&sysfs_writeback_keys_failed,
|
&sysfs_writeback_keys_failed,
|
||||||
|
|
||||||
|
@ -112,6 +112,8 @@ do { \
|
|||||||
|
|
||||||
#define heap_full(h) ((h)->used == (h)->size)
|
#define heap_full(h) ((h)->used == (h)->size)
|
||||||
|
|
||||||
|
#define heap_empty(h) ((h)->used == 0)
|
||||||
|
|
||||||
#define DECLARE_FIFO(type, name) \
|
#define DECLARE_FIFO(type, name) \
|
||||||
struct { \
|
struct { \
|
||||||
size_t front, back, size, mask; \
|
size_t front, back, size, mask; \
|
||||||
|
@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
|
|||||||
|
|
||||||
while (!kthread_should_stop()) {
|
while (!kthread_should_stop()) {
|
||||||
down_write(&dc->writeback_lock);
|
down_write(&dc->writeback_lock);
|
||||||
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
if (!atomic_read(&dc->has_dirty) ||
|
if (!atomic_read(&dc->has_dirty) ||
|
||||||
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
|
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
|
||||||
!dc->writeback_running)) {
|
!dc->writeback_running)) {
|
||||||
up_write(&dc->writeback_lock);
|
up_write(&dc->writeback_lock);
|
||||||
set_current_state(TASK_INTERRUPTIBLE);
|
|
||||||
|
|
||||||
if (kthread_should_stop())
|
if (kthread_should_stop()) {
|
||||||
|
set_current_state(TASK_RUNNING);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
schedule();
|
schedule();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
set_current_state(TASK_RUNNING);
|
||||||
|
|
||||||
searched_full_index = refill_dirty(dc);
|
searched_full_index = refill_dirty(dc);
|
||||||
|
|
||||||
@ -652,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
|||||||
dc->writeback_rate.rate = 1024;
|
dc->writeback_rate.rate = 1024;
|
||||||
dc->writeback_rate_minimum = 8;
|
dc->writeback_rate_minimum = 8;
|
||||||
|
|
||||||
dc->writeback_rate_update_seconds = 5;
|
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
|
||||||
dc->writeback_rate_p_term_inverse = 40;
|
dc->writeback_rate_p_term_inverse = 40;
|
||||||
dc->writeback_rate_i_term_inverse = 10000;
|
dc->writeback_rate_i_term_inverse = 10000;
|
||||||
|
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
#define MAX_WRITEBACKS_IN_PASS 5
|
#define MAX_WRITEBACKS_IN_PASS 5
|
||||||
#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
|
#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
|
||||||
|
|
||||||
|
#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
|
||||||
|
#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 14 (16384ths) is chosen here as something that each backing device
|
* 14 (16384ths) is chosen here as something that each backing device
|
||||||
* should be a reasonable fraction of the share, and not to blow up
|
* should be a reasonable fraction of the share, and not to blow up
|
||||||
|
Loading…
Reference in New Issue
Block a user