drbd: make intelligent use of blkdev_issue_zeroout

drbd always wants its discard wire operations to zero the blocks, so
use blkdev_issue_zeroout with the BLKDEV_ZERO_UNMAP flag instead of
reinventing it poorly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Christoph Hellwig 2017-04-05 19:21:21 +02:00 committed by Jens Axboe
parent 71027e97d7
commit 0dbed96a3c
4 changed files with 7 additions and 110 deletions

View File

@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
if (f & EE_IS_TRIM)
__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
seq_putc(m, '\n'); seq_putc(m, '\n');
} }

View File

@ -437,9 +437,6 @@ enum {
/* is this a TRIM aka REQ_DISCARD? */ /* is this a TRIM aka REQ_DISCARD? */
__EE_IS_TRIM, __EE_IS_TRIM,
/* our lower level cannot handle trim,
* and we want to fall back to zeroout instead */
__EE_IS_TRIM_USE_ZEROOUT,
/* In case a barrier failed, /* In case a barrier failed,
* we need to resubmit without the barrier flag. */ * we need to resubmit without the barrier flag. */
@ -482,7 +479,6 @@ enum {
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_IS_TRIM (1<<__EE_IS_TRIM) #define EE_IS_TRIM (1<<__EE_IS_TRIM)
#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data);
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */ /* drbd_receiver.c */
extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
sector_t start, unsigned int nr_sectors, bool discard);
extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_ack_receiver(struct drbd_thread *thi); extern int drbd_ack_receiver(struct drbd_thread *thi);
extern void drbd_send_ping_wf(struct work_struct *ws); extern void drbd_send_ping_wf(struct work_struct *ws);

View File

@ -1448,108 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
} }
/*
* We *may* ignore the discard-zeroes-data setting, if so configured.
*
* Assumption is that it "discard_zeroes_data=0" is only because the backend
* may ignore partial unaligned discards.
*
* LVM/DM thin as of at least
* LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
* Library version: 1.02.93-RHEL7 (2015-01-28)
* Driver version: 4.29.0
* still behaves this way.
*
* For unaligned (wrt. alignment and granularity) or too small discards,
* we zero-out the initial (and/or) trailing unaligned partial chunks,
* but discard all the aligned full chunks.
*
* At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
*/
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
{
struct block_device *bdev = device->ldev->backing_bdev;
struct request_queue *q = bdev_get_queue(bdev);
sector_t tmp, nr;
unsigned int max_discard_sectors, granularity;
int alignment;
int err = 0;
if (!discard)
goto zero_out;
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto zero_out;
if (nr_sectors < granularity)
goto zero_out;
tmp = start;
if (sector_div(tmp, granularity) != alignment) {
if (nr_sectors < 2*granularity)
goto zero_out;
/* start + gran - (start + gran - align) % gran */
tmp = start + granularity - alignment;
tmp = start + granularity - sector_div(tmp, granularity);
nr = tmp - start;
err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO,
BLKDEV_ZERO_NOUNMAP);
nr_sectors -= nr;
start = tmp;
}
while (nr_sectors >= granularity) {
nr = min_t(sector_t, nr_sectors, max_discard_sectors);
err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO,
BLKDEV_ZERO_NOUNMAP);
nr_sectors -= nr;
start += nr;
}
zero_out:
if (nr_sectors) {
err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
BLKDEV_ZERO_NOUNMAP);
}
return err != 0;
}
static bool can_do_reliable_discards(struct drbd_device *device)
{
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
struct disk_conf *dc;
bool can_do;
if (!blk_queue_discard(q))
return false;
if (q->limits.discard_zeroes_data)
return true;
rcu_read_lock();
dc = rcu_dereference(device->ldev->disk_conf);
can_do = dc->discard_zeroes_if_aligned;
rcu_read_unlock();
return can_do;
}
static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
{ {
/* If the backend cannot discard, or does not guarantee struct block_device *bdev = device->ldev->backing_bdev;
* read-back zeroes in discarded ranges, we fall back to
* zero-out. Unless configuration specifically requested
* otherwise. */
if (!can_do_reliable_discards(device))
peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) GFP_NOIO, 0))
peer_req->flags |= EE_WAS_ERROR; peer_req->flags |= EE_WAS_ERROR;
drbd_endio_write_sec_final(peer_req); drbd_endio_write_sec_final(peer_req);
} }

View File

@ -1148,10 +1148,10 @@ static int drbd_process_write_request(struct drbd_request *req)
static void drbd_process_discard_req(struct drbd_request *req) static void drbd_process_discard_req(struct drbd_request *req)
{ {
int err = drbd_issue_discard_or_zero_out(req->device, struct block_device *bdev = req->device->ldev->backing_bdev;
req->i.sector, req->i.size >> 9, true);
if (err) if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
GFP_NOIO, 0))
req->private_bio->bi_error = -EIO; req->private_bio->bi_error = -EIO;
bio_endio(req->private_bio); bio_endio(req->private_bio);
} }