mirror of
https://github.com/torvalds/linux.git
synced 2024-11-30 08:01:59 +00:00
5166701b36
Pull vfs updates from Al Viro: "The first vfs pile, with deep apologies for being very late in this window. Assorted cleanups and fixes, plus a large preparatory part of iov_iter work. There's a lot more of that, but it'll probably go into the next merge window - it *does* shape up nicely, removes a lot of boilerplate, gets rid of locking inconsistencie between aio_write and splice_write and I hope to get Kent's direct-io rewrite merged into the same queue, but some of the stuff after this point is having (mostly trivial) conflicts with the things already merged into mainline and with some I want more testing. This one passes LTP and xfstests without regressions, in addition to usual beating. BTW, readahead02 in ltp syscalls testsuite has started giving failures since "mm/readahead.c: fix readahead failure for memoryless NUMA nodes and limit readahead pages" - might be a false positive, might be a real regression..." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits) missing bits of "splice: fix racy pipe->buffers uses" cifs: fix the race in cifs_writev() ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure kill generic_file_buffered_write() ocfs2_file_aio_write(): switch to generic_perform_write() ceph_aio_write(): switch to generic_perform_write() xfs_file_buffered_aio_write(): switch to generic_perform_write() export generic_perform_write(), start getting rid of generic_file_buffer_write() generic_file_direct_write(): get rid of ppos argument btrfs_file_aio_write(): get rid of ppos kill the 5th argument of generic_file_buffered_write() kill the 4th argument of __generic_file_aio_write() lustre: don't open-code kernel_recvmsg() ocfs2: don't open-code kernel_recvmsg() drbd: don't open-code kernel_recvmsg() constify blk_rq_map_user_iov() and friends lustre: switch to kernel_sendmsg() ocfs2: don't open-code kernel_sendmsg() take iov_iter stuff to mm/iov_iter.c process_vm_access: tidy up a bit ...
332 lines
8.3 KiB
C
332 lines
8.3 KiB
C
/*
|
|
* Functions related to mapping data to requests
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/blkdev.h>
|
|
#include <scsi/sg.h> /* for struct sg_iovec */
|
|
|
|
#include "blk.h"
|
|
|
|
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
|
|
struct bio *bio)
|
|
{
|
|
if (!rq->bio)
|
|
blk_rq_bio_prep(q, rq, bio);
|
|
else if (!ll_back_merge_fn(q, rq, bio))
|
|
return -EINVAL;
|
|
else {
|
|
rq->biotail->bi_next = bio;
|
|
rq->biotail = bio;
|
|
|
|
rq->__data_len += bio->bi_iter.bi_size;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int __blk_rq_unmap_user(struct bio *bio)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (bio) {
|
|
if (bio_flagged(bio, BIO_USER_MAPPED))
|
|
bio_unmap_user(bio);
|
|
else
|
|
ret = bio_uncopy_user(bio);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
|
|
struct rq_map_data *map_data, void __user *ubuf,
|
|
unsigned int len, gfp_t gfp_mask)
|
|
{
|
|
unsigned long uaddr;
|
|
struct bio *bio, *orig_bio;
|
|
int reading, ret;
|
|
|
|
reading = rq_data_dir(rq) == READ;
|
|
|
|
/*
|
|
* if alignment requirement is satisfied, map in user pages for
|
|
* direct dma. else, set up kernel bounce buffers
|
|
*/
|
|
uaddr = (unsigned long) ubuf;
|
|
if (blk_rq_aligned(q, uaddr, len) && !map_data)
|
|
bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
|
|
else
|
|
bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
|
|
|
|
if (IS_ERR(bio))
|
|
return PTR_ERR(bio);
|
|
|
|
if (map_data && map_data->null_mapped)
|
|
bio->bi_flags |= (1 << BIO_NULL_MAPPED);
|
|
|
|
orig_bio = bio;
|
|
blk_queue_bounce(q, &bio);
|
|
|
|
/*
|
|
* We link the bounce buffer in and could have to traverse it
|
|
* later so we have to get a ref to prevent it from being freed
|
|
*/
|
|
bio_get(bio);
|
|
|
|
ret = blk_rq_append_bio(q, rq, bio);
|
|
if (!ret)
|
|
return bio->bi_iter.bi_size;
|
|
|
|
/* if it was boucned we must call the end io function */
|
|
bio_endio(bio, 0);
|
|
__blk_rq_unmap_user(orig_bio);
|
|
bio_put(bio);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
|
|
* @q: request queue where request should be inserted
|
|
* @rq: request structure to fill
|
|
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
|
* @ubuf: the user buffer
|
|
* @len: length of user data
|
|
* @gfp_mask: memory allocation flags
|
|
*
|
|
* Description:
|
|
* Data will be mapped directly for zero copy I/O, if possible. Otherwise
|
|
* a kernel bounce buffer is used.
|
|
*
|
|
* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
|
|
* still in process context.
|
|
*
|
|
* Note: The mapped bio may need to be bounced through blk_queue_bounce()
|
|
* before being submitted to the device, as pages mapped may be out of
|
|
* reach. It's the callers responsibility to make sure this happens. The
|
|
* original bio must be passed back in to blk_rq_unmap_user() for proper
|
|
* unmapping.
|
|
*/
|
|
int blk_rq_map_user(struct request_queue *q, struct request *rq,
|
|
struct rq_map_data *map_data, void __user *ubuf,
|
|
unsigned long len, gfp_t gfp_mask)
|
|
{
|
|
unsigned long bytes_read = 0;
|
|
struct bio *bio = NULL;
|
|
int ret;
|
|
|
|
if (len > (queue_max_hw_sectors(q) << 9))
|
|
return -EINVAL;
|
|
if (!len)
|
|
return -EINVAL;
|
|
|
|
if (!ubuf && (!map_data || !map_data->null_mapped))
|
|
return -EINVAL;
|
|
|
|
while (bytes_read != len) {
|
|
unsigned long map_len, end, start;
|
|
|
|
map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
|
|
end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
|
|
>> PAGE_SHIFT;
|
|
start = (unsigned long)ubuf >> PAGE_SHIFT;
|
|
|
|
/*
|
|
* A bad offset could cause us to require BIO_MAX_PAGES + 1
|
|
* pages. If this happens we just lower the requested
|
|
* mapping len by a page so that we can fit
|
|
*/
|
|
if (end - start > BIO_MAX_PAGES)
|
|
map_len -= PAGE_SIZE;
|
|
|
|
ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
|
|
gfp_mask);
|
|
if (ret < 0)
|
|
goto unmap_rq;
|
|
if (!bio)
|
|
bio = rq->bio;
|
|
bytes_read += ret;
|
|
ubuf += ret;
|
|
|
|
if (map_data)
|
|
map_data->offset += ret;
|
|
}
|
|
|
|
if (!bio_flagged(bio, BIO_USER_MAPPED))
|
|
rq->cmd_flags |= REQ_COPY_USER;
|
|
|
|
rq->buffer = NULL;
|
|
return 0;
|
|
unmap_rq:
|
|
blk_rq_unmap_user(bio);
|
|
rq->bio = NULL;
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(blk_rq_map_user);
|
|
|
|
/**
|
|
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
|
|
* @q: request queue where request should be inserted
|
|
* @rq: request to map data to
|
|
* @map_data: pointer to the rq_map_data holding pages (if necessary)
|
|
* @iov: pointer to the iovec
|
|
* @iov_count: number of elements in the iovec
|
|
* @len: I/O byte count
|
|
* @gfp_mask: memory allocation flags
|
|
*
|
|
* Description:
|
|
* Data will be mapped directly for zero copy I/O, if possible. Otherwise
|
|
* a kernel bounce buffer is used.
|
|
*
|
|
* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
|
|
* still in process context.
|
|
*
|
|
* Note: The mapped bio may need to be bounced through blk_queue_bounce()
|
|
* before being submitted to the device, as pages mapped may be out of
|
|
* reach. It's the callers responsibility to make sure this happens. The
|
|
* original bio must be passed back in to blk_rq_unmap_user() for proper
|
|
* unmapping.
|
|
*/
|
|
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
|
struct rq_map_data *map_data, const struct sg_iovec *iov,
|
|
int iov_count, unsigned int len, gfp_t gfp_mask)
|
|
{
|
|
struct bio *bio;
|
|
int i, read = rq_data_dir(rq) == READ;
|
|
int unaligned = 0;
|
|
|
|
if (!iov || iov_count <= 0)
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < iov_count; i++) {
|
|
unsigned long uaddr = (unsigned long)iov[i].iov_base;
|
|
|
|
if (!iov[i].iov_len)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Keep going so we check length of all segments
|
|
*/
|
|
if (uaddr & queue_dma_alignment(q))
|
|
unaligned = 1;
|
|
}
|
|
|
|
if (unaligned || (q->dma_pad_mask & len) || map_data)
|
|
bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
|
|
gfp_mask);
|
|
else
|
|
bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
|
|
|
|
if (IS_ERR(bio))
|
|
return PTR_ERR(bio);
|
|
|
|
if (bio->bi_iter.bi_size != len) {
|
|
/*
|
|
* Grab an extra reference to this bio, as bio_unmap_user()
|
|
* expects to be able to drop it twice as it happens on the
|
|
* normal IO completion path
|
|
*/
|
|
bio_get(bio);
|
|
bio_endio(bio, 0);
|
|
__blk_rq_unmap_user(bio);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!bio_flagged(bio, BIO_USER_MAPPED))
|
|
rq->cmd_flags |= REQ_COPY_USER;
|
|
|
|
blk_queue_bounce(q, &bio);
|
|
bio_get(bio);
|
|
blk_rq_bio_prep(q, rq, bio);
|
|
rq->buffer = NULL;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(blk_rq_map_user_iov);
|
|
|
|
/**
|
|
* blk_rq_unmap_user - unmap a request with user data
|
|
* @bio: start of bio list
|
|
*
|
|
* Description:
|
|
* Unmap a rq previously mapped by blk_rq_map_user(). The caller must
|
|
* supply the original rq->bio from the blk_rq_map_user() return, since
|
|
* the I/O completion may have changed rq->bio.
|
|
*/
|
|
int blk_rq_unmap_user(struct bio *bio)
|
|
{
|
|
struct bio *mapped_bio;
|
|
int ret = 0, ret2;
|
|
|
|
while (bio) {
|
|
mapped_bio = bio;
|
|
if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
|
|
mapped_bio = bio->bi_private;
|
|
|
|
ret2 = __blk_rq_unmap_user(mapped_bio);
|
|
if (ret2 && !ret)
|
|
ret = ret2;
|
|
|
|
mapped_bio = bio;
|
|
bio = bio->bi_next;
|
|
bio_put(mapped_bio);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(blk_rq_unmap_user);
|
|
|
|
/**
|
|
* blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
|
|
* @q: request queue where request should be inserted
|
|
* @rq: request to fill
|
|
* @kbuf: the kernel buffer
|
|
* @len: length of user data
|
|
* @gfp_mask: memory allocation flags
|
|
*
|
|
* Description:
|
|
* Data will be mapped directly if possible. Otherwise a bounce
|
|
* buffer is used. Can be called multiple times to append multiple
|
|
* buffers.
|
|
*/
|
|
int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
|
unsigned int len, gfp_t gfp_mask)
|
|
{
|
|
int reading = rq_data_dir(rq) == READ;
|
|
unsigned long addr = (unsigned long) kbuf;
|
|
int do_copy = 0;
|
|
struct bio *bio;
|
|
int ret;
|
|
|
|
if (len > (queue_max_hw_sectors(q) << 9))
|
|
return -EINVAL;
|
|
if (!len || !kbuf)
|
|
return -EINVAL;
|
|
|
|
do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
|
|
if (do_copy)
|
|
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
|
|
else
|
|
bio = bio_map_kern(q, kbuf, len, gfp_mask);
|
|
|
|
if (IS_ERR(bio))
|
|
return PTR_ERR(bio);
|
|
|
|
if (!reading)
|
|
bio->bi_rw |= REQ_WRITE;
|
|
|
|
if (do_copy)
|
|
rq->cmd_flags |= REQ_COPY_USER;
|
|
|
|
ret = blk_rq_append_bio(q, rq, bio);
|
|
if (unlikely(ret)) {
|
|
/* request is too big */
|
|
bio_put(bio);
|
|
return ret;
|
|
}
|
|
|
|
blk_queue_bounce(q, &rq->bio);
|
|
rq->buffer = NULL;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(blk_rq_map_kern);
|