mirror of
https://github.com/torvalds/linux.git
synced 2024-12-26 12:52:30 +00:00
4f024f3797
Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Boaz Harrosh <bharrosh@panasas.com> Cc: Benny Halevy <bhalevy@tonian.com> Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Chris Mason <chris.mason@fusionio.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@kernel.org> Cc: Joern Engel <joern@logfs.org> Cc: Prasad Joshi <prasadjoshi.linux@gmail.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Ben Myers <bpm@sgi.com> Cc: xfs@oss.sgi.com Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Cc: "Roger Pau Monné" <roger.pau@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchand@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Peng Tao <tao.peng@emc.com> Cc: Andy Adamson <andros@netapp.com> Cc: fanchaoting <fanchaoting@cn.fujitsu.com> Cc: Jie Liu <jeff.liu@oracle.com> Cc: Sunil Mushran <sunil.mushran@gmail.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Pankaj Kumar <pankaj.km@samsung.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Mel Gorman <mgorman@suse.de>6
322 lines
7.8 KiB
C
322 lines
7.8 KiB
C
/*
|
|
* fs/logfs/dev_bdev.c - Device access methods for block devices
|
|
*
|
|
* As should be obvious for Linux kernel code, license is GPLv2
|
|
*
|
|
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
|
|
*/
|
|
#include "logfs.h"
|
|
#include <linux/bio.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/prefetch.h>
|
|
|
|
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
|
|
|
|
static int sync_request(struct page *page, struct block_device *bdev, int rw)
|
|
{
|
|
struct bio bio;
|
|
struct bio_vec bio_vec;
|
|
|
|
bio_init(&bio);
|
|
bio.bi_max_vecs = 1;
|
|
bio.bi_io_vec = &bio_vec;
|
|
bio_vec.bv_page = page;
|
|
bio_vec.bv_len = PAGE_SIZE;
|
|
bio_vec.bv_offset = 0;
|
|
bio.bi_vcnt = 1;
|
|
bio.bi_bdev = bdev;
|
|
bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
|
|
bio.bi_iter.bi_size = PAGE_SIZE;
|
|
|
|
return submit_bio_wait(rw, &bio);
|
|
}
|
|
|
|
static int bdev_readpage(void *_sb, struct page *page)
|
|
{
|
|
struct super_block *sb = _sb;
|
|
struct block_device *bdev = logfs_super(sb)->s_bdev;
|
|
int err;
|
|
|
|
err = sync_request(page, bdev, READ);
|
|
if (err) {
|
|
ClearPageUptodate(page);
|
|
SetPageError(page);
|
|
} else {
|
|
SetPageUptodate(page);
|
|
ClearPageError(page);
|
|
}
|
|
unlock_page(page);
|
|
return err;
|
|
}
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(wq);
|
|
|
|
static void writeseg_end_io(struct bio *bio, int err)
|
|
{
|
|
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
struct bio_vec *bvec;
|
|
int i;
|
|
struct super_block *sb = bio->bi_private;
|
|
struct logfs_super *super = logfs_super(sb);
|
|
|
|
BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
|
|
BUG_ON(err);
|
|
|
|
bio_for_each_segment_all(bvec, bio, i) {
|
|
end_page_writeback(bvec->bv_page);
|
|
page_cache_release(bvec->bv_page);
|
|
}
|
|
bio_put(bio);
|
|
if (atomic_dec_and_test(&super->s_pending_writes))
|
|
wake_up(&wq);
|
|
}
|
|
|
|
static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
|
|
size_t nr_pages)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
struct address_space *mapping = super->s_mapping_inode->i_mapping;
|
|
struct bio *bio;
|
|
struct page *page;
|
|
unsigned int max_pages;
|
|
int i;
|
|
|
|
max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
|
|
|
|
bio = bio_alloc(GFP_NOFS, max_pages);
|
|
BUG_ON(!bio);
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
if (i >= max_pages) {
|
|
/* Block layer cannot split bios :( */
|
|
bio->bi_vcnt = i;
|
|
bio->bi_iter.bi_size = i * PAGE_SIZE;
|
|
bio->bi_bdev = super->s_bdev;
|
|
bio->bi_iter.bi_sector = ofs >> 9;
|
|
bio->bi_private = sb;
|
|
bio->bi_end_io = writeseg_end_io;
|
|
atomic_inc(&super->s_pending_writes);
|
|
submit_bio(WRITE, bio);
|
|
|
|
ofs += i * PAGE_SIZE;
|
|
index += i;
|
|
nr_pages -= i;
|
|
i = 0;
|
|
|
|
bio = bio_alloc(GFP_NOFS, max_pages);
|
|
BUG_ON(!bio);
|
|
}
|
|
page = find_lock_page(mapping, index + i);
|
|
BUG_ON(!page);
|
|
bio->bi_io_vec[i].bv_page = page;
|
|
bio->bi_io_vec[i].bv_len = PAGE_SIZE;
|
|
bio->bi_io_vec[i].bv_offset = 0;
|
|
|
|
BUG_ON(PageWriteback(page));
|
|
set_page_writeback(page);
|
|
unlock_page(page);
|
|
}
|
|
bio->bi_vcnt = nr_pages;
|
|
bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
|
|
bio->bi_bdev = super->s_bdev;
|
|
bio->bi_iter.bi_sector = ofs >> 9;
|
|
bio->bi_private = sb;
|
|
bio->bi_end_io = writeseg_end_io;
|
|
atomic_inc(&super->s_pending_writes);
|
|
submit_bio(WRITE, bio);
|
|
return 0;
|
|
}
|
|
|
|
static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
int head;
|
|
|
|
BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
|
|
|
|
if (len == 0) {
|
|
/* This can happen when the object fit perfectly into a
|
|
* segment, the segment gets written per sync and subsequently
|
|
* closed.
|
|
*/
|
|
return;
|
|
}
|
|
head = ofs & (PAGE_SIZE - 1);
|
|
if (head) {
|
|
ofs -= head;
|
|
len += head;
|
|
}
|
|
len = PAGE_ALIGN(len);
|
|
__bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
|
|
}
|
|
|
|
|
|
static void erase_end_io(struct bio *bio, int err)
|
|
{
|
|
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
struct super_block *sb = bio->bi_private;
|
|
struct logfs_super *super = logfs_super(sb);
|
|
|
|
BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
|
|
BUG_ON(err);
|
|
BUG_ON(bio->bi_vcnt == 0);
|
|
bio_put(bio);
|
|
if (atomic_dec_and_test(&super->s_pending_writes))
|
|
wake_up(&wq);
|
|
}
|
|
|
|
static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
|
|
size_t nr_pages)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
struct bio *bio;
|
|
unsigned int max_pages;
|
|
int i;
|
|
|
|
max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
|
|
|
|
bio = bio_alloc(GFP_NOFS, max_pages);
|
|
BUG_ON(!bio);
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
if (i >= max_pages) {
|
|
/* Block layer cannot split bios :( */
|
|
bio->bi_vcnt = i;
|
|
bio->bi_iter.bi_size = i * PAGE_SIZE;
|
|
bio->bi_bdev = super->s_bdev;
|
|
bio->bi_iter.bi_sector = ofs >> 9;
|
|
bio->bi_private = sb;
|
|
bio->bi_end_io = erase_end_io;
|
|
atomic_inc(&super->s_pending_writes);
|
|
submit_bio(WRITE, bio);
|
|
|
|
ofs += i * PAGE_SIZE;
|
|
index += i;
|
|
nr_pages -= i;
|
|
i = 0;
|
|
|
|
bio = bio_alloc(GFP_NOFS, max_pages);
|
|
BUG_ON(!bio);
|
|
}
|
|
bio->bi_io_vec[i].bv_page = super->s_erase_page;
|
|
bio->bi_io_vec[i].bv_len = PAGE_SIZE;
|
|
bio->bi_io_vec[i].bv_offset = 0;
|
|
}
|
|
bio->bi_vcnt = nr_pages;
|
|
bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
|
|
bio->bi_bdev = super->s_bdev;
|
|
bio->bi_iter.bi_sector = ofs >> 9;
|
|
bio->bi_private = sb;
|
|
bio->bi_end_io = erase_end_io;
|
|
atomic_inc(&super->s_pending_writes);
|
|
submit_bio(WRITE, bio);
|
|
return 0;
|
|
}
|
|
|
|
static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
|
|
int ensure_write)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
|
|
BUG_ON(to & (PAGE_SIZE - 1));
|
|
BUG_ON(len & (PAGE_SIZE - 1));
|
|
|
|
if (super->s_flags & LOGFS_SB_FLAG_RO)
|
|
return -EROFS;
|
|
|
|
if (ensure_write) {
|
|
/*
|
|
* Object store doesn't care whether erases happen or not.
|
|
* But for the journal they are required. Otherwise a scan
|
|
* can find an old commit entry and assume it is the current
|
|
* one, travelling back in time.
|
|
*/
|
|
do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void bdev_sync(struct super_block *sb)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
|
|
wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
|
|
}
|
|
|
|
static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
struct address_space *mapping = super->s_mapping_inode->i_mapping;
|
|
filler_t *filler = bdev_readpage;
|
|
|
|
*ofs = 0;
|
|
return read_cache_page(mapping, 0, filler, sb);
|
|
}
|
|
|
|
static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
|
|
{
|
|
struct logfs_super *super = logfs_super(sb);
|
|
struct address_space *mapping = super->s_mapping_inode->i_mapping;
|
|
filler_t *filler = bdev_readpage;
|
|
u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
|
|
pgoff_t index = pos >> PAGE_SHIFT;
|
|
|
|
*ofs = pos;
|
|
return read_cache_page(mapping, index, filler, sb);
|
|
}
|
|
|
|
static int bdev_write_sb(struct super_block *sb, struct page *page)
|
|
{
|
|
struct block_device *bdev = logfs_super(sb)->s_bdev;
|
|
|
|
/* Nothing special to do for block devices. */
|
|
return sync_request(page, bdev, WRITE);
|
|
}
|
|
|
|
static void bdev_put_device(struct logfs_super *s)
|
|
{
|
|
blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
|
|
}
|
|
|
|
static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static const struct logfs_device_ops bd_devops = {
|
|
.find_first_sb = bdev_find_first_sb,
|
|
.find_last_sb = bdev_find_last_sb,
|
|
.write_sb = bdev_write_sb,
|
|
.readpage = bdev_readpage,
|
|
.writeseg = bdev_writeseg,
|
|
.erase = bdev_erase,
|
|
.can_write_buf = bdev_can_write_buf,
|
|
.sync = bdev_sync,
|
|
.put_device = bdev_put_device,
|
|
};
|
|
|
|
int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
|
|
const char *devname)
|
|
{
|
|
struct block_device *bdev;
|
|
|
|
bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
|
|
type);
|
|
if (IS_ERR(bdev))
|
|
return PTR_ERR(bdev);
|
|
|
|
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
|
|
int mtdnr = MINOR(bdev->bd_dev);
|
|
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
|
|
return logfs_get_sb_mtd(p, mtdnr);
|
|
}
|
|
|
|
p->s_bdev = bdev;
|
|
p->s_mtd = NULL;
|
|
p->s_devops = &bd_devops;
|
|
return 0;
|
|
}
|