Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (28 commits)
  cfq-iosched: add close cooperator code
  cfq-iosched: log responsible 'cfqq' in idle timer arm
  cfq-iosched: tweak kick logic a bit more
  cfq-iosched: no need to save interrupts in cfq_kick_queue()
  brd: fix cacheflushing
  brd: support barriers
  swap: Remove code handling bio_alloc failure with __GFP_WAIT
  gfs2: Remove code handling bio_alloc failure with __GFP_WAIT
  ext4: Remove code handling bio_alloc failure with __GFP_WAIT
  dio: Remove code handling bio_alloc failure with __GFP_WAIT
  block: Remove code handling bio_alloc failure with __GFP_WAIT
  bio: add documentation to bio_alloc()
  splice: add helpers for locking pipe inode
  splice: remove generic_file_splice_write_nolock()
  ocfs2: fix i_mutex locking in ocfs2_splice_to_file()
  splice: fix i_mutex locking in generic_splice_write()
  splice: remove i_mutex locking in splice_from_pipe()
  splice: split up __splice_from_pipe()
  block: fix SG_IO to return a proper error value
  cfq-iosched: don't delay queue kick for a merged request
  ...
This commit is contained in:
Linus Torvalds
2009-04-15 09:03:47 -07:00
33 changed files with 826 additions and 534 deletions

View File

@@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio)
return bio && bio->bi_io_vec != NULL;
}
/*
* BIO list managment for use by remapping drivers (e.g. DM or MD).
*
* A bio_list anchors a singly-linked list of bios chained through the bi_next
* member of the bio. The bio_list also caches the last list member to allow
* fast access to the tail.
*/
struct bio_list {
struct bio *head;
struct bio *tail;
};
static inline int bio_list_empty(const struct bio_list *bl)
{
return bl->head == NULL;
}
static inline void bio_list_init(struct bio_list *bl)
{
bl->head = bl->tail = NULL;
}
#define bio_list_for_each(bio, bl) \
for (bio = (bl)->head; bio; bio = bio->bi_next)
static inline unsigned bio_list_size(const struct bio_list *bl)
{
unsigned sz = 0;
struct bio *bio;
bio_list_for_each(bio, bl)
sz++;
return sz;
}
static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = NULL;
if (bl->tail)
bl->tail->bi_next = bio;
else
bl->head = bio;
bl->tail = bio;
}
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->tail)
bl->tail->bi_next = bl2->head;
else
bl->head = bl2->head;
bl->tail = bl2->tail;
}
static inline void bio_list_merge_head(struct bio_list *bl,
struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->head)
bl2->tail->bi_next = bl->head;
else
bl->tail = bl2->tail;
bl->head = bl2->head;
}
static inline struct bio *bio_list_pop(struct bio_list *bl)
{
struct bio *bio = bl->head;
if (bio) {
bl->head = bl->head->bi_next;
if (!bl->head)
bl->tail = NULL;
bio->bi_next = NULL;
}
return bio;
}
static inline struct bio *bio_list_get(struct bio_list *bl)
{
struct bio *bio = bl->head;
bl->head = bl->tail = NULL;
return bio;
}
#if defined(CONFIG_BLK_DEV_INTEGRITY)
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))

View File

@@ -87,6 +87,60 @@ struct inodes_stat_t {
*/
#define FMODE_NOCMTIME ((__force fmode_t)2048)
/*
* The below are the various read and write types that we support. Some of
* them include behavioral modifiers that send information down to the
* block layer and IO scheduler. Terminology:
*
* The block layer uses device plugging to defer IO a little bit, in
* the hope that we will see more IO very shortly. This increases
* coalescing of adjacent IO and thus reduces the number of IOs we
* have to send to the device. It also allows for better queuing,
* if the IO isn't mergeable. If the caller is going to be waiting
* for the IO, then he must ensure that the device is unplugged so
* that the IO is dispatched to the driver.
*
* All IO is handled async in Linux. This is fine for background
* writes, but for reads or writes that someone waits for completion
* on, we want to notify the block layer and IO scheduler so that they
* know about it. That allows them to make better scheduling
* decisions. So when the below references 'sync' and 'async', it
* is referencing this priority hint.
*
* With that in mind, the available types are:
*
* READ A normal read operation. Device will be plugged.
* READ_SYNC A synchronous read. Device is not plugged, caller can
* immediately wait on this read without caring about
* unplugging.
* READA Used for read-ahead operations. Lower priority, and the
* block layer could (in theory) choose to ignore this
* request if it runs into resource problems.
* WRITE A normal async write. Device will be plugged.
* SWRITE Like WRITE, but a special case for ll_rw_block() that
* tells it to lock the buffer first. Normally a buffer
* must be locked before doing IO.
* WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
* the hint that someone will be waiting on this IO
* shortly. The device must still be unplugged explicitly,
* WRITE_SYNC_PLUG does not do this as we could be
* submitting more writes before we actually wait on any
* of them.
* WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
* immediately after submission. The write equivalent
* of READ_SYNC.
* WRITE_ODIRECT Special case write for O_DIRECT only.
* SWRITE_SYNC
* SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
* See SWRITE.
* WRITE_BARRIER Like WRITE, but tells the block layer that all
* previously submitted writes must be safely on storage
* before this one is started. Also guarantees that when
* this write is complete, it itself is also safely on
* storage. Prevents reordering of writes on both sides
* of this IO.
*
*/
#define RW_MASK 1
#define RWA_MASK 2
#define READ 0
@@ -102,6 +156,11 @@ struct inodes_stat_t {
(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
/*
* These aren't really reads or writes, they pass down information about
* parts of device that are now unused by the file system.
*/
#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
@@ -738,9 +797,6 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA
};
extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
/*
* NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic
@@ -2150,8 +2206,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,

View File

@@ -134,6 +134,11 @@ struct pipe_buf_operations {
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
/* Pipe lock and unlock operations */
void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe);

View File

@@ -36,6 +36,8 @@ struct splice_desc {
void *data; /* cookie */
} u;
loff_t pos; /* file position */
size_t num_spliced; /* number of bytes already spliced */
bool need_wakeup; /* need to wake up writer */
};
struct partial_page {
@@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
splice_actor *);
extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
struct splice_desc *, splice_actor *);
extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
splice_actor *);
extern int splice_from_pipe_next(struct pipe_inode_info *,
struct splice_desc *);
extern void splice_from_pipe_begin(struct splice_desc *);
extern void splice_from_pipe_end(struct pipe_inode_info *,
struct splice_desc *);
extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
struct splice_desc *);
extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,