fuse update for 6.13

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCZ0Rb/wAKCRDh3BK/laaZ
 PK80AQDAUgA6S5SSrbJxwRFNOhbwtZxZqJ8fomJR5xuWIEQ9pwEAkpFqhBhBW0y1
 0YaREow2aDANQQtSUrfPtgva1ZXFwQU=
 =Cyx5
 -----END PGP SIGNATURE-----

Merge tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Add page -> folio conversions (Joanne Koong, Josef Bacik)

 - Allow max size of fuse requests to be configurable with a sysctl
   (Joanne Koong)

 - Allow FOPEN_DIRECT_IO to take advantage of async code path (yangyun)

 - Fix large kernel reads (like a module load) in virtio_fs (Hou Tao)

 - Fix attribute inconsistency in case readdirplus (and plain lookup in
   corner cases) is racing with inode eviction (Zhang Tianci)

 - Fix a WARN_ON triggered by virtio_fs (Asahi Lina)

* tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (30 commits)
  virtiofs: dax: remove ->writepages() callback
  fuse: check attributes staleness on fuse_iget()
  fuse: remove pages for requests and exclusively use folios
  fuse: convert direct io to use folios
  mm/writeback: add folio_mark_dirty_lock()
  fuse: convert writebacks to use folios
  fuse: convert retrieves to use folios
  fuse: convert ioctls to use folios
  fuse: convert writes (non-writeback) to use folios
  fuse: convert reads to use folios
  fuse: convert readdir to use folios
  fuse: convert readlink to use folios
  fuse: convert cuse to use folios
  fuse: add support in virtio for requests using folios
  fuse: support folios in struct fuse_args_pages and fuse_copy_pages()
  fuse: convert fuse_notify_store to use folios
  fuse: convert fuse_retrieve to use folios
  fuse: use the folio based vmstat helpers
  fuse: convert fuse_writepage_need_send to take a folio
  fuse: convert fuse_do_readpage to use folios
  ...
This commit is contained in:
Linus Torvalds 2024-11-26 12:41:27 -08:00
commit fb527fc1f3
16 changed files with 579 additions and 375 deletions

View File

@ -337,3 +337,13 @@ Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
on a 64-bit one.
The current default value for ``max_user_watches`` is 4% of the
available low memory, divided by the "watch" cost in bytes.
5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems
=====================================================================
This directory contains the following configuration options for FUSE
filesystems:
``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for
setting/getting the maximum number of pages that can be used for servicing
requests in FUSE.

View File

@ -14,5 +14,6 @@ fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
fuse-$(CONFIG_SYSCTL) += sysctl.o
virtiofs-y := virtio_fs.o

View File

@ -303,8 +303,8 @@ struct cuse_init_args {
struct fuse_args_pages ap;
struct cuse_init_in in;
struct cuse_init_out out;
struct page *page;
struct fuse_page_desc desc;
struct folio *folio;
struct fuse_folio_desc desc;
};
/**
@ -326,7 +326,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = &ia->out;
struct page *page = ap->pages[0];
struct folio *folio = ap->folios[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
@ -343,7 +343,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
/* parse init reply */
cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
rc = cuse_parse_devinfo(folio_address(folio), ap->args.out_args[1].size,
&devinfo);
if (rc)
goto err;
@ -411,7 +411,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
kfree(ia);
__free_page(page);
folio_put(folio);
return;
err_cdev:
@ -429,7 +429,7 @@ err:
static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct page *page;
struct folio *folio;
struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
@ -437,13 +437,14 @@ static int cuse_send_init(struct cuse_conn *cc)
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
rc = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0);
if (!folio)
goto err;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (!ia)
goto err_free_page;
goto err_free_folio;
ap = &ia->ap;
ia->in.major = FUSE_KERNEL_VERSION;
@ -459,18 +460,18 @@ static int cuse_send_init(struct cuse_conn *cc)
ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
ap->args.out_argvar = true;
ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &ia->page;
ap->num_folios = 1;
ap->folios = &ia->folio;
ap->descs = &ia->desc;
ia->page = page;
ia->folio = folio;
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
__free_page(page);
err_free_folio:
folio_put(folio);
}
err:
return rc;

View File

@ -774,16 +774,6 @@ out:
return ret;
}
static int fuse_dax_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
}
static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order,
bool write)
{
@ -1323,7 +1313,6 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
}
static const struct address_space_operations fuse_dax_file_aops = {
.writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
};

View File

@ -1028,17 +1028,27 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
struct fuse_req *req = cs->req;
struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
struct page *orig, *pagep;
err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
orig = pagep = &ap->folios[i]->page;
err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
if (err)
return err;
nbytes -= count;
/*
* fuse_copy_page may have moved a page from a pipe instead of
* copying into our given page, so update the folios if it was
* replaced.
*/
if (pagep != orig)
ap->folios[i] = page_folio(pagep);
}
return 0;
}
@ -1654,24 +1664,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
struct folio *folio;
struct page *page;
unsigned int this_num;
err = -ENOMEM;
page = find_or_create_page(mapping, index,
mapping_gfp_mask(mapping));
if (!page)
folio = filemap_grab_folio(mapping, index);
err = PTR_ERR(folio);
if (IS_ERR(folio))
goto out_iput;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
page = &folio->page;
this_num = min_t(unsigned, num, folio_size(folio) - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
if (!PageUptodate(page) && !err && offset == 0 &&
(this_num == PAGE_SIZE || file_size == end)) {
zero_user_segment(page, this_num, PAGE_SIZE);
SetPageUptodate(page);
if (!folio_test_uptodate(folio) && !err && offset == 0 &&
(this_num == folio_size(folio) || file_size == end)) {
folio_zero_segment(folio, this_num, folio_size(folio));
folio_mark_uptodate(folio);
}
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
if (err)
goto out_iput;
@ -1703,7 +1714,7 @@ static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_retrieve_args *ra =
container_of(args, typeof(*ra), ap.args);
release_pages(ra->ap.pages, ra->ap.num_pages);
release_pages(ra->ap.folios, ra->ap.num_folios);
kfree(ra);
}
@ -1717,7 +1728,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
unsigned int num_pages;
unsigned int num_pages, cur_pages = 0;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@ -1736,15 +1747,15 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
ra = kzalloc(args_size, GFP_KERNEL);
if (!ra)
return -ENOMEM;
ap = &ra->ap;
ap->pages = (void *) (ra + 1);
ap->descs = (void *) (ap->pages + num_pages);
ap->folios = (void *) (ra + 1);
ap->descs = (void *) (ap->folios + num_pages);
args = &ap->args;
args->nodeid = outarg->nodeid;
@ -1755,19 +1766,20 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
while (num && ap->num_pages < num_pages) {
struct page *page;
while (num && cur_pages < num_pages) {
struct folio *folio;
unsigned int this_num;
page = find_get_page(mapping, index);
if (!page)
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio))
break;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].offset = offset;
ap->descs[ap->num_pages].length = this_num;
ap->num_pages++;
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].offset = offset;
ap->descs[ap->num_folios].length = this_num;
ap->num_folios++;
cur_pages++;
offset = 0;
num -= this_num;

View File

@ -366,7 +366,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
u64 attr_version, evict_ctr;
int err;
*inode = NULL;
@ -381,6 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out;
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
err = fuse_simple_request(fm, &args);
@ -398,7 +399,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, ATTR_TIMEOUT(outarg),
attr_version);
attr_version, evict_ctr);
err = -ENOMEM;
if (!*inode) {
fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
@ -691,7 +692,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
ff->nodeid = outentry.nodeid;
ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, ATTR_TIMEOUT(&outentry), 0);
&outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
@ -822,7 +823,7 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, ATTR_TIMEOUT(&outarg), 0);
&outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
if (!inode) {
fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
@ -1585,13 +1586,13 @@ static int fuse_permission(struct mnt_idmap *idmap,
return err;
}
static int fuse_readlink_page(struct inode *inode, struct page *page)
static int fuse_readlink_page(struct inode *inode, struct folio *folio)
{
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = {
.num_pages = 1,
.pages = &page,
.num_folios = 1,
.folios = &folio,
.descs = &desc,
};
char *link;
@ -1614,7 +1615,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page)
if (WARN_ON(res >= PAGE_SIZE))
return -EIO;
link = page_address(page);
link = folio_address(folio);
link[res] = '\0';
return 0;
@ -1624,7 +1625,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct page *page;
struct folio *folio;
int err;
err = -EIO;
@ -1638,20 +1639,20 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry)
goto out_err;
page = alloc_page(GFP_KERNEL);
folio = folio_alloc(GFP_KERNEL, 0);
err = -ENOMEM;
if (!page)
if (!folio)
goto out_err;
err = fuse_readlink_page(inode, page);
err = fuse_readlink_page(inode, folio);
if (err) {
__free_page(page);
folio_put(folio);
goto out_err;
}
set_delayed_call(callback, page_put_link, page);
set_delayed_call(callback, page_put_link, &folio->page);
return page_address(page);
return folio_address(folio);
out_err:
return ERR_PTR(err);
@ -2028,7 +2029,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode));
fuse_get_cache_mask(inode), 0);
oldsize = inode->i_size;
/* see the comment in fuse_change_attributes() */
if (!is_wb || is_truncate)
@ -2231,7 +2232,7 @@ void fuse_init_dir(struct inode *inode)
static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
int err = fuse_readlink_page(folio->mapping->host, &folio->page);
int err = fuse_readlink_page(folio->mapping->host, folio);
if (!err)
folio_mark_uptodate(folio);

View File

@ -436,7 +436,7 @@ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from >= curr_index + wpa->ia.ap.num_pages)
if (idx_from >= curr_index + wpa->ia.ap.num_folios)
n = n->rb_right;
else if (idx_to < curr_index)
n = n->rb_left;
@ -483,6 +483,21 @@ static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
}
static inline bool fuse_folio_is_writeback(struct inode *inode,
struct folio *folio)
{
pgoff_t last = folio_next_index(folio) - 1;
return fuse_range_is_writeback(inode, folio_index(folio), last);
}
static void fuse_wait_on_folio_writeback(struct inode *inode,
struct folio *folio)
{
struct fuse_inode *fi = get_fuse_inode(inode);
wait_event(fi->page_waitq, !fuse_folio_is_writeback(inode, folio));
}
/*
* Wait for all pending writepages on the inode to finish.
*
@ -645,17 +660,20 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
static void fuse_release_user_pages(struct fuse_args_pages *ap,
static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
for (i = 0; i < ap->num_pages; i++) {
for (i = 0; i < ap->num_folios; i++) {
if (should_dirty)
set_page_dirty_lock(ap->pages[i]);
folio_mark_dirty_lock(ap->folios[i]);
if (ap->args.is_pinned)
unpin_user_page(ap->pages[i]);
unpin_folio(ap->folios[i]);
}
if (nres > 0 && ap->args.invalidate_vmap)
invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@ -725,16 +743,16 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
}
static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
unsigned int npages)
unsigned int nfolios)
{
struct fuse_io_args *ia;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (ia) {
ia->io = io;
ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
&ia->ap.descs);
if (!ia->ap.pages) {
ia->ap.folios = fuse_folios_alloc(nfolios, GFP_KERNEL,
&ia->ap.descs);
if (!ia->ap.folios) {
kfree(ia);
ia = NULL;
}
@ -744,7 +762,7 @@ static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
static void fuse_io_free(struct fuse_io_args *ia)
{
kfree(ia->ap.pages);
kfree(ia->ap.folios);
kfree(ia);
}
@ -754,25 +772,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
fuse_release_user_pages(&ia->ap, io->should_dirty);
size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
} else if (ia->write.in.size != ia->write.out.size) {
pos = ia->write.in.offset - io->offset +
ia->write.out.size;
} else {
nres = ia->write.out.size;
if (ia->write.in.size != ia->write.out.size)
pos = ia->write.in.offset - io->offset +
ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@ -843,33 +865,33 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
* reached the client fs yet. So the hole is not present there.
*/
if (!fc->writeback_cache) {
loff_t pos = page_offset(ap->pages[0]) + num_read;
loff_t pos = folio_pos(ap->folios[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
}
static int fuse_do_readpage(struct file *file, struct page *page)
static int fuse_do_readfolio(struct file *file, struct folio *folio)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct fuse_mount *fm = get_fuse_mount(inode);
loff_t pos = page_offset(page);
struct fuse_page_desc desc = { .length = PAGE_SIZE };
loff_t pos = folio_pos(folio);
struct fuse_folio_desc desc = { .length = PAGE_SIZE };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
.ap.num_pages = 1,
.ap.pages = &page,
.ap.num_folios = 1,
.ap.folios = &folio,
.ap.descs = &desc,
};
ssize_t res;
u64 attr_ver;
/*
* Page writeback can extend beyond the lifetime of the
* page-cache page, so make sure we read a properly synced
* page.
* With the temporary pages that are used to complete writeback, we can
* have writeback that extends beyond the lifetime of the folio. So
* make sure we read a properly synced folio.
*/
fuse_wait_on_page_writeback(inode, page->index);
fuse_wait_on_folio_writeback(inode, folio);
attr_ver = fuse_get_attr_version(fm->fc);
@ -887,25 +909,24 @@ static int fuse_do_readpage(struct file *file, struct page *page)
if (res < desc.length)
fuse_short_read(inode, attr_ver, res, &ia.ap);
SetPageUptodate(page);
folio_mark_uptodate(folio);
return 0;
}
static int fuse_read_folio(struct file *file, struct folio *folio)
{
struct page *page = &folio->page;
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
int err;
err = -EIO;
if (fuse_is_bad(inode))
goto out;
err = fuse_do_readpage(file, page);
err = fuse_do_readfolio(file, folio);
fuse_invalidate_atime(inode);
out:
unlock_page(page);
folio_unlock(folio);
return err;
}
@ -919,8 +940,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
size_t num_read = args->out_args[0].size;
struct address_space *mapping = NULL;
for (i = 0; mapping == NULL && i < ap->num_pages; i++)
mapping = ap->pages[i]->mapping;
for (i = 0; mapping == NULL && i < ap->num_folios; i++)
mapping = ap->folios[i]->mapping;
if (mapping) {
struct inode *inode = mapping->host;
@ -934,12 +955,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_invalidate_atime(inode);
}
for (i = 0; i < ap->num_pages; i++) {
struct folio *folio = page_folio(ap->pages[i]);
folio_end_read(folio, !err);
folio_put(folio);
}
for (i = 0; i < ap->num_folios; i++)
folio_end_read(ap->folios[i], !err);
if (ia->ff)
fuse_file_put(ia->ff, false);
@ -951,8 +968,9 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT;
loff_t pos = folio_pos(ap->folios[0]);
/* Currently, all folios in FUSE are one page */
size_t count = ap->num_folios << PAGE_SHIFT;
ssize_t res;
int err;
@ -963,7 +981,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
/* Don't overflow end offset */
if (pos + (count - 1) == LLONG_MAX) {
count--;
ap->descs[ap->num_pages - 1].length--;
ap->descs[ap->num_folios - 1].length--;
}
WARN_ON((loff_t) (pos + count) < 0);
@ -985,18 +1003,36 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
unsigned int i, max_pages, nr_pages = 0;
unsigned int max_pages, nr_pages;
pgoff_t first = readahead_index(rac);
pgoff_t last = first + readahead_count(rac) - 1;
if (fuse_is_bad(inode))
return;
wait_event(fi->page_waitq, !fuse_range_is_writeback(inode, first, last));
max_pages = min_t(unsigned int, fc->max_pages,
fc->max_read / PAGE_SIZE);
for (;;) {
/*
* This is only accurate the first time through, since readahead_folio()
* doesn't update readahead_count() from the previous folio until the
* next call. Grab nr_pages here so we know how many pages we're going
* to have to process. This means that we will exit here with
* readahead_count() == folio_nr_pages(last_folio), but we will have
* consumed all of the folios, and read_pages() will call
* readahead_folio() again which will clean up the rac.
*/
nr_pages = readahead_count(rac);
while (nr_pages) {
struct fuse_io_args *ia;
struct fuse_args_pages *ap;
struct folio *folio;
unsigned cur_pages = min(max_pages, nr_pages);
if (fc->num_background >= fc->congestion_threshold &&
rac->ra->async_size >= readahead_count(rac))
@ -1006,23 +1042,19 @@ static void fuse_readahead(struct readahead_control *rac)
*/
break;
nr_pages = readahead_count(rac) - nr_pages;
if (nr_pages > max_pages)
nr_pages = max_pages;
if (nr_pages == 0)
break;
ia = fuse_io_alloc(NULL, nr_pages);
ia = fuse_io_alloc(NULL, cur_pages);
if (!ia)
return;
ap = &ia->ap;
nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
for (i = 0; i < nr_pages; i++) {
fuse_wait_on_page_writeback(inode,
readahead_index(rac) + i);
ap->descs[i].length = PAGE_SIZE;
while (ap->num_folios < cur_pages) {
folio = readahead_folio(rac);
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].length = folio_size(folio);
ap->num_folios++;
}
ap->num_pages = nr_pages;
fuse_send_readpages(ia, rac->file);
nr_pages -= cur_pages;
}
}
@ -1139,8 +1171,8 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
bool short_write;
int err;
for (i = 0; i < ap->num_pages; i++)
fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
for (i = 0; i < ap->num_folios; i++)
fuse_wait_on_folio_writeback(inode, ap->folios[i]);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
@ -1154,24 +1186,24 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
for (i = 0; i < ap->num_folios; i++) {
struct folio *folio = ap->folios[i];
if (err) {
ClearPageUptodate(page);
folio_clear_uptodate(folio);
} else {
if (count >= PAGE_SIZE - offset)
count -= PAGE_SIZE - offset;
if (count >= folio_size(folio) - offset)
count -= folio_size(folio) - offset;
else {
if (short_write)
ClearPageUptodate(page);
folio_clear_uptodate(folio);
count = 0;
}
offset = 0;
}
if (ia->write.page_locked && (i == ap->num_pages - 1))
unlock_page(page);
put_page(page);
if (ia->write.folio_locked && (i == ap->num_folios - 1))
folio_unlock(folio);
folio_put(folio);
}
return err;
@ -1185,6 +1217,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
unsigned int nr_pages = 0;
size_t count = 0;
int err;
@ -1193,7 +1226,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
do {
size_t tmp;
struct page *page;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
size_t bytes = min_t(size_t, PAGE_SIZE - offset,
iov_iter_count(ii));
@ -1205,27 +1238,30 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (fault_in_iov_iter_readable(ii, bytes))
break;
err = -ENOMEM;
page = grab_cache_page_write_begin(mapping, index);
if (!page)
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio)) {
err = PTR_ERR(folio);
break;
}
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
flush_dcache_folio(folio);
tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
flush_dcache_page(page);
tmp = copy_folio_from_iter_atomic(folio, offset, bytes, ii);
flush_dcache_folio(folio);
if (!tmp) {
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
goto again;
}
err = 0;
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].length = tmp;
ap->num_pages++;
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].length = tmp;
ap->num_folios++;
nr_pages++;
count += tmp;
pos += tmp;
@ -1235,18 +1271,18 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
/* If we copied full page, mark it uptodate */
if (tmp == PAGE_SIZE)
SetPageUptodate(page);
folio_mark_uptodate(folio);
if (PageUptodate(page)) {
unlock_page(page);
if (folio_test_uptodate(folio)) {
folio_unlock(folio);
} else {
ia->write.page_locked = true;
ia->write.folio_locked = true;
break;
}
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
ap->num_pages < max_pages && offset == 0);
nr_pages < max_pages && offset == 0);
return count > 0 ? count : err;
}
@ -1280,8 +1316,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
if (!ap->pages) {
ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs);
if (!ap->folios) {
err = -ENOMEM;
break;
}
@ -1303,7 +1339,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
err = -EIO;
}
}
kfree(ap->pages);
kfree(ap->folios);
} while (!err && iov_iter_count(ii));
fuse_write_update_attr(inode, pos, res);
@ -1430,11 +1466,7 @@ writethrough:
task_io_account_write(count);
err = file_remove_privs(file);
if (err)
goto out;
err = file_update_time(file);
err = kiocb_modified(iocb);
if (err)
goto out;
@ -1468,35 +1500,57 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
unsigned int max_pages)
unsigned int max_pages,
bool use_pages_for_kvec_io)
{
bool flush_or_invalidate = false;
unsigned int nr_pages = 0;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
/* Special case for kernel I/O: can copy directly into the buffer */
/* Special case for kernel I/O: can copy directly into the buffer.
* However if the implementation of fuse_conn requires pages instead of
* pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
*/
if (iov_iter_is_kvec(ii)) {
unsigned long user_addr = fuse_get_user_addr(ii);
size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
void *user_addr = (void *)fuse_get_user_addr(ii);
if (write)
ap->args.in_args[1].value = (void *) user_addr;
else
ap->args.out_args[0].value = (void *) user_addr;
if (!use_pages_for_kvec_io) {
size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
iov_iter_advance(ii, frag_size);
*nbytesp = frag_size;
return 0;
if (write)
ap->args.in_args[1].value = user_addr;
else
ap->args.out_args[0].value = user_addr;
iov_iter_advance(ii, frag_size);
*nbytesp = frag_size;
return 0;
}
if (is_vmalloc_addr(user_addr)) {
ap->args.vmap_base = user_addr;
flush_or_invalidate = true;
}
}
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
unsigned npages;
size_t start;
struct page **pt_pages;
/*
* Until there is support for iov_iter_extract_folios(), we have to
* manually extract pages using iov_iter_extract_pages() and then
* copy that to a folios array.
*/
struct page **pages = kzalloc(max_pages * sizeof(struct page *),
GFP_KERNEL);
if (!pages)
return -ENOMEM;
pt_pages = &ap->pages[ap->num_pages];
ret = iov_iter_extract_pages(ii, &pt_pages,
while (nbytes < *nbytesp && nr_pages < max_pages) {
unsigned nfolios, i;
size_t start;
ret = iov_iter_extract_pages(ii, &pages,
*nbytesp - nbytes,
max_pages - ap->num_pages,
max_pages - nr_pages,
0, &start);
if (ret < 0)
break;
@ -1504,16 +1558,25 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
nbytes += ret;
ret += start;
npages = DIV_ROUND_UP(ret, PAGE_SIZE);
/* Currently, all folios in FUSE are one page */
nfolios = DIV_ROUND_UP(ret, PAGE_SIZE);
ap->descs[ap->num_pages].offset = start;
fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
ap->descs[ap->num_folios].offset = start;
fuse_folio_descs_length_init(ap->descs, ap->num_folios, nfolios);
for (i = 0; i < nfolios; i++)
ap->folios[i + ap->num_folios] = page_folio(pages[i]);
ap->num_pages += npages;
ap->descs[ap->num_pages - 1].length -=
ap->num_folios += nfolios;
ap->descs[ap->num_folios - 1].length -=
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
nr_pages += nfolios;
}
kfree(pages);
if (write && flush_or_invalidate)
flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
ap->args.invalidate_vmap = !write && flush_or_invalidate;
ap->args.is_pinned = iov_iter_extract_will_pin(ii);
ap->args.user_pages = true;
if (write)
@ -1582,7 +1645,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
max_pages);
max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@ -1596,7 +1659,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
fuse_release_user_pages(&ia->ap, io->should_dirty);
fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
@ -1650,7 +1713,7 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t res;
if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, to);
} else {
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
@ -1664,7 +1727,6 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
ssize_t res;
bool exclusive;
@ -1672,9 +1734,11 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
res = generic_write_checks(iocb, from);
if (res > 0) {
task_io_account_write(res);
if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, from);
} else {
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
res = fuse_direct_io(&io, from, &iocb->ki_pos,
FUSE_DIO_WRITE);
fuse_write_update_attr(inode, iocb->ki_pos, res);
@ -1760,21 +1824,21 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
for (i = 0; i < ap->num_pages; i++)
__free_page(ap->pages[i]);
for (i = 0; i < ap->num_folios; i++)
folio_put(ap->folios[i]);
fuse_file_put(wpa->ia.ff, false);
kfree(ap->pages);
kfree(ap->folios);
kfree(wpa);
}
static void fuse_writepage_finish_stat(struct inode *inode, struct page *page)
static void fuse_writepage_finish_stat(struct inode *inode, struct folio *folio)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(page, NR_WRITEBACK_TEMP);
node_stat_sub_folio(folio, NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
@ -1785,8 +1849,8 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
struct fuse_inode *fi = get_fuse_inode(inode);
int i;
for (i = 0; i < ap->num_pages; i++)
fuse_writepage_finish_stat(inode, ap->pages[i]);
for (i = 0; i < ap->num_folios; i++)
fuse_writepage_finish_stat(inode, ap->folios[i]);
wake_up(&fi->page_waitq);
}
@ -1801,7 +1865,8 @@ __acquires(fi->lock)
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_args *args = &wpa->ia.ap.args;
__u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
/* Currently, all folios in FUSE are one page */
__u64 data_size = wpa->ia.ap.num_folios * PAGE_SIZE;
int err;
fi->writectr++;
@ -1841,7 +1906,8 @@ __acquires(fi->lock)
for (aux = wpa->next; aux; aux = next) {
next = aux->next;
aux->next = NULL;
fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]);
fuse_writepage_finish_stat(aux->inode,
aux->ia.ap.folios[0]);
fuse_writepage_free(aux);
}
@ -1876,11 +1942,11 @@ static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_folios - 1;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
WARN_ON(!wpa->ia.ap.num_pages);
WARN_ON(!wpa->ia.ap.num_folios);
while (*p) {
struct fuse_writepage_args *curr;
pgoff_t curr_index;
@ -1891,7 +1957,7 @@ static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
WARN_ON(curr->inode != wpa->inode);
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from >= curr_index + curr->ia.ap.num_pages)
if (idx_from >= curr_index + curr->ia.ap.num_folios)
p = &(*p)->rb_right;
else if (idx_to < curr_index)
p = &(*p)->rb_left;
@ -2023,9 +2089,9 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
if (wpa) {
ap = &wpa->ia.ap;
ap->num_pages = 0;
ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
if (!ap->pages) {
ap->num_folios = 0;
ap->folios = fuse_folios_alloc(1, GFP_NOFS, &ap->descs);
if (!ap->folios) {
kfree(wpa);
wpa = NULL;
}
@ -2049,19 +2115,19 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
}
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
struct folio *tmp_folio, uint32_t page_index)
struct folio *tmp_folio, uint32_t folio_index)
{
struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
folio_copy(tmp_folio, folio);
ap->pages[page_index] = &tmp_folio->page;
ap->descs[page_index].offset = 0;
ap->descs[page_index].length = PAGE_SIZE;
ap->folios[folio_index] = tmp_folio;
ap->descs[folio_index].offset = 0;
ap->descs[folio_index].length = PAGE_SIZE;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP);
node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
@ -2115,7 +2181,7 @@ static int fuse_writepage_locked(struct folio *folio)
goto err_writepage_args;
ap = &wpa->ia.ap;
ap->num_pages = 1;
ap->num_folios = 1;
folio_start_writeback(folio);
fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
@ -2143,32 +2209,32 @@ struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
struct inode *inode;
struct page **orig_pages;
unsigned int max_pages;
struct folio **orig_folios;
unsigned int max_folios;
};
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
{
struct fuse_args_pages *ap = &data->wpa->ia.ap;
struct fuse_conn *fc = get_fuse_conn(data->inode);
struct page **pages;
struct fuse_page_desc *descs;
unsigned int npages = min_t(unsigned int,
max_t(unsigned int, data->max_pages * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
struct folio **folios;
struct fuse_folio_desc *descs;
unsigned int nfolios = min_t(unsigned int,
max_t(unsigned int, data->max_folios * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
WARN_ON(npages <= data->max_pages);
WARN_ON(nfolios <= data->max_folios);
pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
if (!pages)
folios = fuse_folios_alloc(nfolios, GFP_NOFS, &descs);
if (!folios)
return false;
memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
kfree(ap->pages);
ap->pages = pages;
memcpy(folios, ap->folios, sizeof(struct folio *) * ap->num_folios);
memcpy(descs, ap->descs, sizeof(struct fuse_folio_desc) * ap->num_folios);
kfree(ap->folios);
ap->folios = folios;
ap->descs = descs;
data->max_pages = npages;
data->max_folios = nfolios;
return true;
}
@ -2178,7 +2244,7 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
struct fuse_writepage_args *wpa = data->wpa;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
int num_pages = wpa->ia.ap.num_pages;
int num_folios = wpa->ia.ap.num_folios;
int i;
spin_lock(&fi->lock);
@ -2186,8 +2252,8 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
for (i = 0; i < num_pages; i++)
end_page_writeback(data->orig_pages[i]);
for (i = 0; i < num_folios; i++)
folio_end_writeback(data->orig_folios[i]);
}
/*
@ -2198,15 +2264,15 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
* swapping the new temp page with the old one.
*/
static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
struct page *page)
struct folio *folio)
{
struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
struct fuse_writepage_args *tmp;
struct fuse_writepage_args *old_wpa;
struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
WARN_ON(new_ap->num_pages != 0);
new_ap->num_pages = 1;
WARN_ON(new_ap->num_folios != 0);
new_ap->num_folios = 1;
spin_lock(&fi->lock);
old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
@ -2220,9 +2286,9 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
WARN_ON(tmp->inode != new_wpa->inode);
curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
if (curr_index == page->index) {
WARN_ON(tmp->ia.ap.num_pages != 1);
swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
if (curr_index == folio->index) {
WARN_ON(tmp->ia.ap.num_folios != 1);
swap(tmp->ia.ap.folios[0], new_ap->folios[0]);
break;
}
}
@ -2235,18 +2301,19 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
spin_unlock(&fi->lock);
if (tmp) {
fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]);
fuse_writepage_finish_stat(new_wpa->inode,
folio);
fuse_writepage_free(new_wpa);
}
return false;
}
static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
struct fuse_args_pages *ap,
struct fuse_fill_wb_data *data)
{
WARN_ON(!ap->num_pages);
WARN_ON(!ap->num_folios);
/*
* Being under writeback is unlikely but possible. For example direct
@ -2254,23 +2321,23 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
* the pages are faulted with get_user_pages(), and then after the read
* completed.
*/
if (fuse_page_is_writeback(data->inode, page->index))
if (fuse_folio_is_writeback(data->inode, folio))
return true;
/* Reached max pages */
if (ap->num_pages == fc->max_pages)
if (ap->num_folios == fc->max_pages)
return true;
/* Reached max write bytes */
if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
if ((ap->num_folios + 1) * PAGE_SIZE > fc->max_write)
return true;
/* Discontinuity */
if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio))
return true;
/* Need to grow the pages array? If so, did the expansion fail? */
if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
if (ap->num_folios == data->max_folios && !fuse_pages_realloc(data))
return true;
return false;
@ -2295,7 +2362,7 @@ static int fuse_writepages_fill(struct folio *folio,
goto out_unlock;
}
if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
}
@ -2314,7 +2381,7 @@ static int fuse_writepages_fill(struct folio *folio,
* This is ensured by holding the page lock in page_mkwrite() while
* checking fuse_page_is_writeback(). We already hold the page lock
* since clear_page_dirty_for_io() and keep it held until we add the
* request to the fi->writepages list and increment ap->num_pages.
* request to the fi->writepages list and increment ap->num_folios.
* After this fuse_page_is_writeback() will indicate that the page is
* under writeback, so we can release the page lock.
*/
@ -2326,13 +2393,13 @@ static int fuse_writepages_fill(struct folio *folio,
goto out_unlock;
}
fuse_file_get(wpa->ia.ff);
data->max_pages = 1;
data->max_folios = 1;
ap = &wpa->ia.ap;
}
folio_start_writeback(folio);
fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages);
data->orig_pages[ap->num_pages] = &folio->page;
fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_folios);
data->orig_folios[ap->num_folios] = folio;
err = 0;
if (data->wpa) {
@ -2341,9 +2408,9 @@ static int fuse_writepages_fill(struct folio *folio,
* fuse_page_is_writeback().
*/
spin_lock(&fi->lock);
ap->num_pages++;
ap->num_folios++;
spin_unlock(&fi->lock);
} else if (fuse_writepage_add(wpa, &folio->page)) {
} else if (fuse_writepage_add(wpa, folio)) {
data->wpa = wpa;
} else {
folio_end_writeback(folio);
@ -2375,21 +2442,21 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
data.orig_pages = kcalloc(fc->max_pages,
sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
data.orig_folios = kcalloc(fc->max_pages,
sizeof(struct folio *),
GFP_NOFS);
if (!data.orig_folios)
goto out;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
WARN_ON(!data.wpa->ia.ap.num_pages);
WARN_ON(!data.wpa->ia.ap.num_folios);
fuse_writepages_send(&data);
}
if (data.ff)
fuse_file_put(data.ff, false);
kfree(data.orig_pages);
kfree(data.orig_folios);
out:
return err;
}
@ -2429,7 +2496,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
folio_zero_segment(folio, 0, off);
goto success;
}
err = fuse_do_readpage(file, &folio->page);
err = fuse_do_readfolio(file, folio);
if (err)
goto cleanup;
success:
@ -2518,17 +2585,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
*/
static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
file_update_time(vmf->vma->vm_file);
lock_page(page);
if (page->mapping != inode->i_mapping) {
unlock_page(page);
folio_lock(folio);
if (folio->mapping != inode->i_mapping) {
folio_unlock(folio);
return VM_FAULT_NOPAGE;
}
fuse_wait_on_page_writeback(inode, page->index);
fuse_wait_on_folio_writeback(inode, folio);
return VM_FAULT_LOCKED;
}

View File

@ -35,9 +35,6 @@
/** Default max number of pages that can be used in a single read request */
#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/** Maximum of max_pages received in init_out */
#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN
@ -47,6 +44,9 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
/** Maximum of max_pages received in init_out */
extern unsigned int fuse_max_pages_limit;
/** List of active connections */
extern struct list_head fuse_conn_list;
@ -285,8 +285,8 @@ struct fuse_arg {
void *value;
};
/** FUSE page descriptor */
struct fuse_page_desc {
/** FUSE folio descriptor */
struct fuse_folio_desc {
unsigned int length;
unsigned int offset;
};
@ -309,16 +309,19 @@ struct fuse_args {
bool may_block:1;
bool is_ext:1;
bool is_pinned:1;
bool invalidate_vmap:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
/* Used for kvec iter backed by vmalloc address */
void *vmap_base;
};
struct fuse_args_pages {
struct fuse_args args;
struct page **pages;
struct fuse_page_desc *descs;
unsigned int num_pages;
struct folio **folios;
struct fuse_folio_desc *descs;
unsigned int num_folios;
};
struct fuse_release_args {
@ -857,6 +860,9 @@ struct fuse_conn {
/** Passthrough support for read/write IO */
unsigned int passthrough:1;
/* Use pages instead of pointer for kernel I/O */
unsigned int use_pages_for_kvec_io:1;
/** Maximum stack depth for passthrough backing files */
int max_stack_depth;
@ -884,6 +890,9 @@ struct fuse_conn {
/** Version counter for attribute changes */
atomic64_t attr_version;
/** Version counter for evict inode */
atomic64_t evict_ctr;
/** Called on final put */
void (*release)(struct fuse_conn *);
@ -978,6 +987,11 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
return atomic64_read(&fc->attr_version);
}
static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc)
{
return atomic64_read(&fc->evict_ctr);
}
static inline bool fuse_stale_inode(const struct inode *inode, int generation,
struct fuse_attr *attr)
{
@ -995,25 +1009,25 @@ static inline bool fuse_is_bad(struct inode *inode)
return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
}
static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags,
struct fuse_folio_desc **desc)
{
struct page **pages;
struct folio **folios;
pages = kzalloc(npages * (sizeof(struct page *) +
sizeof(struct fuse_page_desc)), flags);
*desc = (void *) (pages + npages);
folios = kzalloc(nfolios * (sizeof(struct folio *) +
sizeof(struct fuse_folio_desc)), flags);
*desc = (void *) (folios + nfolios);
return pages;
return folios;
}
static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
unsigned int index,
unsigned int nr_pages)
static inline void fuse_folio_descs_length_init(struct fuse_folio_desc *descs,
unsigned int index,
unsigned int nr_folios)
{
int i;
for (i = index; i < index + nr_pages; i++)
for (i = index; i < index + nr_folios; i++)
descs[i].length = PAGE_SIZE - descs[i].offset;
}
@ -1037,7 +1051,8 @@ extern const struct dentry_operations fuse_root_dentry_operations;
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
u64 attr_valid, u64 attr_version,
u64 evict_ctr);
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode);
@ -1062,7 +1077,7 @@ struct fuse_io_args {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
bool page_locked;
bool folio_locked;
} write;
};
struct fuse_args_pages ap;
@ -1127,7 +1142,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask);
u64 attr_valid, u32 cache_mask,
u64 evict_ctr);
u32 fuse_get_cache_mask(struct inode *inode);
@ -1480,4 +1496,12 @@ ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
#ifdef CONFIG_SYSCTL
extern int fuse_sysctl_register(void);
extern void fuse_sysctl_unregister(void);
#else
#define fuse_sysctl_register() (0)
#define fuse_sysctl_unregister() do { } while (0)
#endif /* CONFIG_SYSCTL */
#endif /* _FS_FUSE_I_H */

View File

@ -35,6 +35,8 @@ DEFINE_MUTEX(fuse_mutex);
static int set_global_limit(const char *val, const struct kernel_param *kp);
unsigned int fuse_max_pages_limit = 256;
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
@ -173,6 +175,14 @@ static void fuse_evict_inode(struct inode *inode)
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
}
/*
* Evict of non-deleted inode may race with outstanding
* LOOKUP/READDIRPLUS requests and result in inconsistency when
* the request finishes. Deal with that here by bumping a
* counter that can be compared to the starting value.
*/
if (inode->i_nlink > 0)
atomic64_inc(&fc->evict_ctr);
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
@ -206,17 +216,30 @@ static ino_t fuse_squash_ino(u64 ino64)
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask)
u64 attr_valid, u32 cache_mask,
u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
lockdep_assert_held(&fi->lock);
/*
* Clear basic stats from invalid mask.
*
* Don't do this if this is coming from a fuse_iget() call and there
* might have been a racing evict which would've invalidated the result
* if the attr_version would've been preserved.
*
* !evict_ctr -> this is create
* fi->attr_version != 0 -> this is not a new inode
* evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
*/
if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
fi->attr_version = atomic64_inc_return(&fc->attr_version);
fi->i_time = attr_valid;
/* Clear basic stats from invalid mask */
set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@ -295,9 +318,9 @@ u32 fuse_get_cache_mask(struct inode *inode)
return STATX_MTIME | STATX_CTIME | STATX_SIZE;
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u64 attr_version)
static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx, u64 attr_valid,
u64 attr_version, u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@ -331,7 +354,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
}
old_mtime = inode_get_mtime(inode);
fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask,
evict_ctr);
oldsize = inode->i_size;
/*
@ -372,6 +396,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx, u64 attr_valid,
u64 attr_version)
{
fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0);
}
static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
u64 nodeid)
{
@ -426,7 +457,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version)
u64 attr_valid, u64 attr_version,
u64 evict_ctr)
{
struct inode *inode;
struct fuse_inode *fi;
@ -487,8 +519,8 @@ retry:
fi->nlookup++;
spin_unlock(&fi->lock);
done:
fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version,
evict_ctr);
return inode;
}
@ -940,11 +972,12 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->initialized = 0;
fc->connected = 1;
atomic64_set(&fc->attr_version, 1);
atomic64_set(&fc->evict_ctr, 1);
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
fc->max_pages_limit = fuse_max_pages_limit;
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
fuse_backing_files_init(fc);
@ -1001,7 +1034,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
}
struct fuse_inode_handle {
@ -1610,7 +1643,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
fuse_get_evict_ctr(fm->fc));
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does
@ -2063,8 +2097,14 @@ static int __init fuse_fs_init(void)
if (err)
goto out3;
err = fuse_sysctl_register();
if (err)
goto out4;
return 0;
out4:
unregister_filesystem(&fuse_fs_type);
out3:
unregister_fuseblk();
out2:
@ -2075,6 +2115,7 @@ static int __init fuse_fs_init(void)
static void fuse_fs_cleanup(void)
{
fuse_sysctl_unregister();
unregister_filesystem(&fuse_fs_type);
unregister_fuseblk();

View File

@ -10,6 +10,8 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#define FUSE_VERITY_ENABLE_ARG_MAX_PAGES 256
static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_ioctl_out *outarg)
{
@ -140,7 +142,7 @@ static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov,
{
struct fsverity_enable_arg enable;
struct fsverity_enable_arg __user *uarg = (void __user *)arg;
const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE;
const __u32 max_buffer_len = FUSE_VERITY_ENABLE_ARG_MAX_PAGES * PAGE_SIZE;
if (copy_from_user(&enable, uarg, sizeof(enable)))
return -EFAULT;
@ -249,12 +251,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
ap.folios = fuse_folios_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!ap.pages || !iov_page)
if (!ap.folios || !iov_page)
goto out;
fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
fuse_folio_descs_length_init(ap.descs, 0, fm->fc->max_pages);
/*
* If restricted, initialize IO parameters as encoded in @cmd.
@ -304,14 +306,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -ENOMEM;
if (max_pages > fm->fc->max_pages)
goto out;
while (ap.num_pages < max_pages) {
ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
if (!ap.pages[ap.num_pages])
while (ap.num_folios < max_pages) {
ap.folios[ap.num_folios] = folio_alloc(GFP_KERNEL | __GFP_HIGHMEM, 0);
if (!ap.folios[ap.num_folios])
goto out;
ap.num_pages++;
ap.num_folios++;
}
/* okay, let's send it to the client */
ap.args.opcode = FUSE_IOCTL;
ap.args.nodeid = ff->nodeid;
@ -325,8 +326,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
c = copy_folio_from_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
@ -364,7 +365,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
vaddr = kmap_local_page(ap.pages[0]);
vaddr = kmap_local_folio(ap.folios[0], 0);
err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0);
@ -392,17 +393,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
c = copy_folio_to_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
err = 0;
out:
free_page((unsigned long) iov_page);
while (ap.num_pages)
__free_page(ap.pages[--ap.num_pages]);
kfree(ap.pages);
while (ap.num_folios)
folio_put(ap.folios[--ap.num_folios]);
kfree(ap.folios);
return err ? err : outarg.result;
}

View File

@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
u64 attr_version, u64 evict_ctr)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
@ -233,7 +233,7 @@ retry:
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, ATTR_TIMEOUT(o),
attr_version);
attr_version, evict_ctr);
if (!inode)
inode = ERR_PTR(-ENOMEM);
@ -284,7 +284,8 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
struct dir_context *ctx, u64 attr_version,
u64 evict_ctr)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
@ -319,7 +320,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
@ -331,26 +332,27 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus;
ssize_t res;
struct page *page;
struct folio *folio;
struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
u64 attr_version = 0;
struct fuse_folio_desc desc = { .length = PAGE_SIZE };
u64 attr_version = 0, evict_ctr = 0;
bool locked;
page = alloc_page(GFP_KERNEL);
if (!page)
folio = folio_alloc(GFP_KERNEL, 0);
if (!folio)
return -ENOMEM;
plus = fuse_use_readdirplus(inode, ctx);
ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &page;
ap->num_folios = 1;
ap->folios = &folio;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
@ -367,15 +369,16 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
res = parse_dirplusfile(page_address(page), res,
file, ctx, attr_version);
res = parse_dirplusfile(folio_address(folio), res,
file, ctx, attr_version,
evict_ctr);
} else {
res = parse_dirfile(page_address(page), res, file,
res = parse_dirfile(folio_address(folio), res, file,
ctx);
}
}
__free_page(page);
folio_put(folio);
fuse_invalidate_atime(inode);
return res;
}

40
fs/fuse/sysctl.c Normal file
View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/fuse/fuse_sysctl.c
*
* Sysctl interface to fuse parameters
*/
#include <linux/sysctl.h>
#include "fuse_i.h"
static struct ctl_table_header *fuse_table_header;
/* Bound by fuse_init_out max_pages, which is a u16 */
static unsigned int sysctl_fuse_max_pages_limit = 65535;
static struct ctl_table fuse_sysctl_table[] = {
{
.procname = "max_pages_limit",
.data = &fuse_max_pages_limit,
.maxlen = sizeof(fuse_max_pages_limit),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = SYSCTL_ONE,
.extra2 = &sysctl_fuse_max_pages_limit,
},
};
int fuse_sysctl_register(void)
{
fuse_table_header = register_sysctl("fs/fuse", fuse_sysctl_table);
if (!fuse_table_header)
return -ENOMEM;
return 0;
}
void fuse_sysctl_unregister(void)
{
unregister_sysctl_table(fuse_table_header);
fuse_table_header = NULL;
}

View File

@ -97,7 +97,8 @@ struct virtio_fs_req_work {
};
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
struct fuse_req *req, bool in_flight,
gfp_t gfp);
static const struct constant_table dax_param_enums[] = {
{"always", FUSE_DAX_ALWAYS },
@ -575,6 +576,8 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
/* Dispatch pending requests */
while (1) {
unsigned int flags;
spin_lock(&fsvq->lock);
req = list_first_entry_or_null(&fsvq->queued_reqs,
struct fuse_req, list);
@ -585,7 +588,9 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list);
spin_unlock(&fsvq->lock);
ret = virtio_fs_enqueue_req(fsvq, req, true);
flags = memalloc_nofs_save();
ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL);
memalloc_nofs_restore(flags);
if (ret < 0) {
if (ret == -ENOSPC) {
spin_lock(&fsvq->lock);
@ -686,7 +691,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
}
/* Allocate and copy args into req->argbuf */
static int copy_args_to_argbuf(struct fuse_req *req)
static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
{
struct fuse_args *args = req->args;
unsigned int offset = 0;
@ -700,7 +705,7 @@ static int copy_args_to_argbuf(struct fuse_req *req)
len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
fuse_len_args(num_out, args->out_args);
req->argbuf = kmalloc(len, GFP_ATOMIC);
req->argbuf = kmalloc(len, gfp);
if (!req->argbuf)
return -ENOMEM;
@ -760,7 +765,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
struct page *page;
struct folio *folio;
/*
* TODO verify that server properly follows FUSE protocol
@ -772,12 +777,12 @@ static void virtio_fs_request_complete(struct fuse_req *req,
if (args->out_pages && args->page_zeroing) {
len = args->out_args[args->out_numargs - 1].size;
ap = container_of(args, typeof(*ap), args);
for (i = 0; i < ap->num_pages; i++) {
for (i = 0; i < ap->num_folios; i++) {
thislen = ap->descs[i].length;
if (len < thislen) {
WARN_ON(ap->descs[i].offset);
page = ap->pages[i];
zero_user_segment(page, len, thislen);
folio = ap->folios[i];
folio_zero_segment(folio, len, thislen);
len = 0;
} else {
len -= thislen;
@ -1267,15 +1272,15 @@ static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *r
}
/* Count number of scatter-gather elements required */
static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
unsigned int num_pages,
unsigned int total_len)
static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs,
unsigned int num_folios,
unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
for (i = 0; i < num_pages && total_len; i++) {
this_len = min(page_descs[i].length, total_len);
for (i = 0; i < num_folios && total_len; i++) {
this_len = min(folio_descs[i].length, total_len);
total_len -= this_len;
}
@ -1294,8 +1299,8 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->in_pages) {
size = args->in_args[args->in_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
size);
}
if (!test_bit(FR_ISREPLY, &req->flags))
@ -1308,27 +1313,27 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->out_pages) {
size = args->out_args[args->out_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
size);
}
return total_sgs;
}
/* Add pages to scatter-gather list and return number of elements used */
static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
struct page **pages,
struct fuse_page_desc *page_descs,
unsigned int num_pages,
unsigned int total_len)
/* Add folios to scatter-gather list and return number of elements used */
static unsigned int sg_init_fuse_folios(struct scatterlist *sg,
struct folio **folios,
struct fuse_folio_desc *folio_descs,
unsigned int num_folios,
unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
for (i = 0; i < num_pages && total_len; i++) {
for (i = 0; i < num_folios && total_len; i++) {
sg_init_table(&sg[i], 1);
this_len = min(page_descs[i].length, total_len);
sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
this_len = min(folio_descs[i].length, total_len);
sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset);
total_len -= this_len;
}
@ -1353,10 +1358,10 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
sg_init_one(&sg[total_sgs++], argbuf, len);
if (argpages)
total_sgs += sg_init_fuse_pages(&sg[total_sgs],
ap->pages, ap->descs,
ap->num_pages,
args[numargs - 1].size);
total_sgs += sg_init_fuse_folios(&sg[total_sgs],
ap->folios, ap->descs,
ap->num_folios,
args[numargs - 1].size);
if (len_used)
*len_used = len;
@ -1366,7 +1371,8 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight)
struct fuse_req *req, bool in_flight,
gfp_t gfp)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@ -1387,8 +1393,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
if (total_sgs > ARRAY_SIZE(stack_sgs)) {
sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp);
if (!sgs || !sg) {
ret = -ENOMEM;
goto out;
@ -1396,7 +1402,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
}
/* Use a bounce buffer since stack args cannot be mapped */
ret = copy_args_to_argbuf(req);
ret = copy_args_to_argbuf(req, gfp);
if (ret < 0)
goto out;
@ -1490,7 +1496,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
queue_id);
fsvq = &fs->vqs[queue_id];
ret = virtio_fs_enqueue_req(fsvq, req, false);
ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
if (ret < 0) {
if (ret == -ENOSPC) {
/*
@ -1691,6 +1697,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->delete_stale = true;
fc->auto_submounts = true;
fc->sync_fs = true;
fc->use_pages_for_kvec_io = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,

View File

@ -2550,6 +2550,7 @@ struct kvec;
struct page *get_dump_page(unsigned long addr);
bool folio_mark_dirty(struct folio *folio);
bool folio_mark_dirty_lock(struct folio *folio);
bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);

View File

@ -52,6 +52,12 @@ bool set_page_dirty(struct page *page)
}
EXPORT_SYMBOL(set_page_dirty);
int set_page_dirty_lock(struct page *page)
{
return folio_mark_dirty_lock(page_folio(page));
}
EXPORT_SYMBOL(set_page_dirty_lock);
bool clear_page_dirty_for_io(struct page *page)
{
return folio_clear_dirty_for_io(page_folio(page));

View File

@ -2925,25 +2925,25 @@ bool folio_mark_dirty(struct folio *folio)
EXPORT_SYMBOL(folio_mark_dirty);
/*
* set_page_dirty() is racy if the caller has no reference against
* page->mapping->host, and if the page is unlocked. This is because another
* CPU could truncate the page off the mapping and then free the mapping.
* folio_mark_dirty() is racy if the caller has no reference against
* folio->mapping->host, and if the folio is unlocked. This is because another
* CPU could truncate the folio off the mapping and then free the mapping.
*
* Usually, the page _is_ locked, or the caller is a user-space process which
* Usually, the folio _is_ locked, or the caller is a user-space process which
* holds a reference on the inode by having an open file.
*
* In other cases, the page should be locked before running set_page_dirty().
* In other cases, the folio should be locked before running folio_mark_dirty().
*/
int set_page_dirty_lock(struct page *page)
bool folio_mark_dirty_lock(struct folio *folio)
{
int ret;
bool ret;
lock_page(page);
ret = set_page_dirty(page);
unlock_page(page);
folio_lock(folio);
ret = folio_mark_dirty(folio);
folio_unlock(folio);
return ret;
}
EXPORT_SYMBOL(set_page_dirty_lock);
EXPORT_SYMBOL(folio_mark_dirty_lock);
/*
* This cancels just the dirty bit on the kernel page itself, it does NOT