mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
for-6.12/io_uring-discard-20240913
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmbkboUQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpj7DD/oDqQ13NOHuotVbufPRDWuG6+UEaN/Pukp/ RYDWwYu/DB4v7LVWBV9COqN5jQqY2wrMpgBdZqtEnDtC7yjN6QYAT4TQdfIq/HNo NooN4ULmJzOOC6sR9MBGyzOsCbz7kmRt1nBZ7vdEXMrLXeX9JDX3bDrELf7jhKsk 84lKE/Mxs530LSzxAtN9KaOQncK5gXen4WSrZsYraU2vJFAPBkJwQGAL5pOdmsp9 NqvNE3QonPr4v99XnDJH80q44afuqffUITPjtGX52tBMO3CCUQFUpZp5fiUjfa1v Okz+SyeBE6gB7c008BGqTOgmKdQOMs3uwFDQ/xMw+pYwy+wHH4skzPP776DwAdgn C/SaVFsaXkqOXX4f+CiNJ01LmD4EOBy16LM5qE4NwLNpjQu/3EdHjNqaYfM/LCca YyQoUOsnYIRj21+oNFpKekscuEAPKG9ewyMyvfxbkk167j00lgwVwybb/2JfYvRJ i0GBY5phJnkeNUerU9SDm6RBTAjDOZ0stubTtFjugDZdrz2FmA4pBFGWjgYLiLhH 3ZCyaCAOoYW8yxxkogTzKbLx6wXb5wgS7jTHgsk+eeSSWRBTnv2sd0fn/D5m3Uw7 uBHKvauDp3zEd9MdF26QG7U6RlojEbVoyTYjnJskPsClxbch4WSpwvoEILdJRvls 1dTczxgdyw== =wlzo -----END PGP SIGNATURE----- Merge tag 'for-6.12/io_uring-discard-20240913' of git://git.kernel.dk/linux Pull io_uring async discard support from Jens Axboe: "Sitting on top of both the 6.12 block and io_uring core branches, here's support for async discard through io_uring. This allows applications to issue async discards, rather than rely on the blocking sync ioctl discards we already have. The sync support is difficult to use outside of idle/cleanup periods. On a real (but slow) device, testing shows the following results when compared to sync discard: qd64 sync discard: 21K IOPS, lat avg 3 msec (max 21 msec) qd64 async discard: 76K IOPS, lat avg 845 usec (max 2.2 msec) qd64 sync discard: 14K IOPS, lat avg 5 msec (max 25 msec) qd64 async discard: 56K IOPS, lat avg 1153 usec (max 3.6 msec) and synthetic null_blk testing with the same queue depth and block size settings as above shows: Type Trim size IOPS Lat avg (usec) Lat Max (usec) ============================================================== sync 4k 144K 444 20314 async 4k 1353K 47 595 sync 1M 56K 1136 21031 async 1M 94K 680 760" * tag 'for-6.12/io_uring-discard-20240913' of git://git.kernel.dk/linux: block: implement async io_uring discard cmd block: introduce blk_validate_byte_range() filemap: introduce filemap_invalidate_pages io_uring/cmd: give inline space in request to cmds io_uring/cmd: expose iowq to cmds
This commit is contained in:
commit
adfc3ded5c
@ -609,6 +609,7 @@ blk_mode_t file_to_blk_mode(struct file *file);
|
||||
int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
|
||||
loff_t lstart, loff_t lend);
|
||||
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
|
||||
int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
|
||||
|
||||
extern const struct address_space_operations def_blk_aops;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include "blk.h"
|
||||
|
||||
static inline struct inode *bdev_file_inode(struct file *file)
|
||||
@ -865,6 +866,7 @@ const struct file_operations def_blk_fops = {
|
||||
.splice_read = filemap_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = blkdev_fallocate,
|
||||
.uring_cmd = blkdev_uring_cmd,
|
||||
.fop_flags = FOP_BUFFER_RASYNC,
|
||||
};
|
||||
|
||||
|
163
block/ioctl.c
163
block/ioctl.c
@ -11,6 +11,9 @@
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/pr.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <uapi/linux/blkdev.h>
|
||||
#include "blk.h"
|
||||
|
||||
static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
@ -92,41 +95,54 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check that [start, start + len) is a valid range from the block device's
|
||||
* perspective, including verifying that it can be correctly translated into
|
||||
* logical block addresses.
|
||||
*/
|
||||
static int blk_validate_byte_range(struct block_device *bdev,
|
||||
uint64_t start, uint64_t len)
|
||||
{
|
||||
unsigned int bs_mask = bdev_logical_block_size(bdev) - 1;
|
||||
uint64_t end;
|
||||
|
||||
if ((start | len) & bs_mask)
|
||||
return -EINVAL;
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
unsigned long arg)
|
||||
{
|
||||
unsigned int bs_mask = bdev_logical_block_size(bdev) - 1;
|
||||
uint64_t range[2], start, len, end;
|
||||
uint64_t range[2], start, len;
|
||||
struct bio *prev = NULL, *bio;
|
||||
sector_t sector, nr_sects;
|
||||
struct blk_plug plug;
|
||||
int err;
|
||||
|
||||
if (!(mode & BLK_OPEN_WRITE))
|
||||
return -EBADF;
|
||||
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
start = range[0];
|
||||
len = range[1];
|
||||
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
if ((start | len) & bs_mask)
|
||||
return -EINVAL;
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (check_add_overflow(start, len, &end) ||
|
||||
end > bdev_nr_bytes(bdev))
|
||||
return -EINVAL;
|
||||
if (!(mode & BLK_OPEN_WRITE))
|
||||
return -EBADF;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
err = blk_validate_byte_range(bdev, start, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, end - 1);
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
@ -735,3 +751,112 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct blk_iou_cmd {
|
||||
int res;
|
||||
bool nowait;
|
||||
};
|
||||
|
||||
static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
|
||||
|
||||
if (bic->res == -EAGAIN && bic->nowait)
|
||||
io_uring_cmd_issue_blocking(cmd);
|
||||
else
|
||||
io_uring_cmd_done(cmd, bic->res, 0, issue_flags);
|
||||
}
|
||||
|
||||
static void bio_cmd_bio_end_io(struct bio *bio)
|
||||
{
|
||||
struct io_uring_cmd *cmd = bio->bi_private;
|
||||
struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
|
||||
|
||||
if (unlikely(bio->bi_status) && !bic->res)
|
||||
bic->res = blk_status_to_errno(bio->bi_status);
|
||||
|
||||
io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static int blkdev_cmd_discard(struct io_uring_cmd *cmd,
|
||||
struct block_device *bdev,
|
||||
uint64_t start, uint64_t len, bool nowait)
|
||||
{
|
||||
struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
|
||||
gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL;
|
||||
sector_t sector = start >> SECTOR_SHIFT;
|
||||
sector_t nr_sects = len >> SECTOR_SHIFT;
|
||||
struct bio *prev = NULL, *bio;
|
||||
int err;
|
||||
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE))
|
||||
return -EBADF;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
err = blk_validate_byte_range(bdev, start, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = filemap_invalidate_pages(bdev->bd_mapping, start,
|
||||
start + len - 1, nowait);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
while (true) {
|
||||
bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp);
|
||||
if (!bio)
|
||||
break;
|
||||
if (nowait) {
|
||||
/*
|
||||
* Don't allow multi-bio non-blocking submissions as
|
||||
* subsequent bios may fail but we won't get a direct
|
||||
* indication of that. Normally, the caller should
|
||||
* retry from a blocking context.
|
||||
*/
|
||||
if (unlikely(nr_sects)) {
|
||||
bio_put(bio);
|
||||
return -EAGAIN;
|
||||
}
|
||||
bio->bi_opf |= REQ_NOWAIT;
|
||||
}
|
||||
|
||||
prev = bio_chain_and_submit(prev, bio);
|
||||
}
|
||||
if (unlikely(!prev))
|
||||
return -EAGAIN;
|
||||
if (unlikely(nr_sects))
|
||||
bic->res = -EAGAIN;
|
||||
|
||||
prev->bi_private = cmd;
|
||||
prev->bi_end_io = bio_cmd_bio_end_io;
|
||||
submit_bio(prev);
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host);
|
||||
struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
|
||||
const struct io_uring_sqe *sqe = cmd->sqe;
|
||||
u32 cmd_op = cmd->cmd_op;
|
||||
uint64_t start, len;
|
||||
|
||||
if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len ||
|
||||
sqe->rw_flags || sqe->file_index))
|
||||
return -EINVAL;
|
||||
|
||||
bic->res = 0;
|
||||
bic->nowait = issue_flags & IO_URING_F_NONBLOCK;
|
||||
|
||||
start = READ_ONCE(sqe->addr);
|
||||
len = READ_ONCE(sqe->addr3);
|
||||
|
||||
switch (cmd_op) {
|
||||
case BLOCK_URING_CMD_DISCARD:
|
||||
return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -23,6 +23,15 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
static inline void io_uring_cmd_private_sz_check(size_t cmd_sz)
|
||||
{
|
||||
BUILD_BUG_ON(cmd_sz > sizeof_field(struct io_uring_cmd, pdu));
|
||||
}
|
||||
#define io_uring_cmd_to_pdu(cmd, pdu_type) ( \
|
||||
io_uring_cmd_private_sz_check(sizeof(pdu_type)), \
|
||||
((pdu_type *)&(cmd)->pdu) \
|
||||
)
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
@ -48,6 +57,9 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
|
||||
/* Execute the request from a blocking context */
|
||||
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
|
||||
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
@ -67,6 +79,9 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -32,6 +32,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
|
||||
void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
|
||||
int filemap_invalidate_pages(struct address_space *mapping,
|
||||
loff_t pos, loff_t end, bool nowait);
|
||||
|
||||
int write_inode_now(struct inode *, int sync);
|
||||
int filemap_fdatawrite(struct address_space *);
|
||||
|
14
include/uapi/linux/blkdev.h
Normal file
14
include/uapi/linux/blkdev.h
Normal file
@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _UAPI_LINUX_BLKDEV_H
|
||||
#define _UAPI_LINUX_BLKDEV_H
|
||||
|
||||
#include <linux/ioctl.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* io_uring block file commands, see IORING_OP_URING_CMD.
|
||||
* It's a different number space from ioctl(), reuse the block's code 0x12.
|
||||
*/
|
||||
#define BLOCK_URING_CMD_DISCARD _IO(0x12, 0)
|
||||
|
||||
#endif
|
@ -533,6 +533,17 @@ static void io_queue_iowq(struct io_kiocb *req)
|
||||
io_queue_linked_timeout(link);
|
||||
}
|
||||
|
||||
static void io_req_queue_iowq_tw(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
io_queue_iowq(req);
|
||||
}
|
||||
|
||||
void io_req_queue_iowq(struct io_kiocb *req)
|
||||
{
|
||||
req->io_task_work.func = io_req_queue_iowq_tw;
|
||||
io_req_task_work_add(req);
|
||||
}
|
||||
|
||||
static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
|
||||
{
|
||||
while (!list_empty(&ctx->defer_list)) {
|
||||
|
@ -94,6 +94,7 @@ int io_uring_alloc_task_context(struct task_struct *task,
|
||||
|
||||
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end);
|
||||
void io_req_queue_iowq(struct io_kiocb *req);
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
|
||||
|
@ -277,6 +277,13 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
|
||||
|
||||
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
|
||||
|
||||
io_req_queue_iowq(req);
|
||||
}
|
||||
|
||||
static inline int io_uring_cmd_getsockopt(struct socket *sock,
|
||||
struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
|
17
mm/filemap.c
17
mm/filemap.c
@ -2712,14 +2712,12 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kiocb_write_and_wait);
|
||||
|
||||
int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
|
||||
int filemap_invalidate_pages(struct address_space *mapping,
|
||||
loff_t pos, loff_t end, bool nowait)
|
||||
{
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
loff_t pos = iocb->ki_pos;
|
||||
loff_t end = pos + count - 1;
|
||||
int ret;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (nowait) {
|
||||
/* we could block if there are any pages in the range */
|
||||
if (filemap_range_has_page(mapping, pos, end))
|
||||
return -EAGAIN;
|
||||
@ -2738,6 +2736,15 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
|
||||
return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
|
||||
end >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
|
||||
{
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
|
||||
return filemap_invalidate_pages(mapping, iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1,
|
||||
iocb->ki_flags & IOCB_NOWAIT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kiocb_invalidate_pages);
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user