fuse update for 6.12

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCZvKlbgAKCRDh3BK/laaZ
 PLliAP9q5btlhlffnRg2LWCf4rIzbJ6vkORkc+GeyAXnWkIljQEA9En1K2vyg7Tk
 f9FvNQK9C+pS0GxURDRI7YedJ2f9FQ0=
 =wuY0
 -----END PGP SIGNATURE-----

Merge tag 'fuse-update-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Add support for idmapped fuse mounts (Alexander Mikhalitsyn)

 - Add optimization when checking for writeback (yangyun)

 - Add tracepoints (Josef Bacik)

 - Clean up writeback code (Joanne Koong)

 - Clean up request queuing (me)

 - Misc fixes

* tag 'fuse-update-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (32 commits)
  fuse: use exclusive lock when FUSE_I_CACHE_IO_MODE is set
  fuse: clear FR_PENDING if abort is detected when sending request
  fs/fuse: convert to use invalid_mnt_idmap
  fs/mnt_idmapping: introduce an invalid_mnt_idmap
  fs/fuse: introduce and use fuse_simple_idmap_request() helper
  fs/fuse: fix null-ptr-deref when checking SB_I_NOIDMAP flag
  fuse: allow O_PATH fd for FUSE_DEV_IOC_BACKING_OPEN
  virtio_fs: allow idmapped mounts
  fuse: allow idmapped mounts
  fuse: warn if fuse_access is called when idmapped mounts are allowed
  fuse: handle idmappings properly in ->write_iter()
  fuse: support idmapped ->rename op
  fuse: support idmapped ->set_acl
  fuse: drop idmap argument from __fuse_get_acl
  fuse: support idmapped ->setattr op
  fuse: support idmapped ->permission inode op
  fuse: support idmapped getattr inode op
  fuse: support idmap for mkdir/mknod/symlink/create/tmpfile
  fuse: support idmapped FUSE_EXT_GROUPS
  fuse: add an idmap argument to fuse_simple_request
  ...
This commit is contained in:
Linus Torvalds 2024-09-24 15:29:42 -07:00
commit f7fccaa772
15 changed files with 552 additions and 297 deletions

View File

@ -3,6 +3,9 @@
# Makefile for the FUSE filesystem. # Makefile for the FUSE filesystem.
# #
# Needed for trace events
ccflags-y = -I$(src)
obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o

View File

@ -12,7 +12,6 @@
#include <linux/posix_acl_xattr.h> #include <linux/posix_acl_xattr.h>
static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc,
struct mnt_idmap *idmap,
struct inode *inode, int type, bool rcu) struct inode *inode, int type, bool rcu)
{ {
int size; int size;
@ -74,7 +73,7 @@ struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap,
if (fuse_no_acl(fc, inode)) if (fuse_no_acl(fc, inode))
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
return __fuse_get_acl(fc, idmap, inode, type, false); return __fuse_get_acl(fc, inode, type, false);
} }
struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu)
@ -90,8 +89,7 @@ struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu)
*/ */
if (!fc->posix_acl) if (!fc->posix_acl)
return NULL; return NULL;
return __fuse_get_acl(fc, inode, type, rcu);
return __fuse_get_acl(fc, &nop_mnt_idmap, inode, type, rcu);
} }
int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
@ -146,8 +144,8 @@ int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
* be stripped. * be stripped.
*/ */
if (fc->posix_acl && if (fc->posix_acl &&
!in_group_or_capable(&nop_mnt_idmap, inode, !in_group_or_capable(idmap, inode,
i_gid_into_vfsgid(&nop_mnt_idmap, inode))) i_gid_into_vfsgid(idmap, inode)))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);

View File

@ -22,6 +22,9 @@
#include <linux/splice.h> #include <linux/splice.h>
#include <linux/sched.h> #include <linux/sched.h>
#define CREATE_TRACE_POINTS
#include "fuse_trace.h"
MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse"); MODULE_ALIAS("devname:fuse");
@ -105,11 +108,17 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
static void fuse_put_request(struct fuse_req *req); static void fuse_put_request(struct fuse_req *req);
static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background) static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
struct fuse_mount *fm,
bool for_background)
{ {
struct fuse_conn *fc = fm->fc; struct fuse_conn *fc = fm->fc;
struct fuse_req *req; struct fuse_req *req;
bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP);
kuid_t fsuid;
kgid_t fsgid;
int err; int err;
atomic_inc(&fc->num_waiting); atomic_inc(&fc->num_waiting);
if (fuse_block_alloc(fc, for_background)) { if (fuse_block_alloc(fc, for_background)) {
@ -137,19 +146,32 @@ static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
goto out; goto out;
} }
req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
__set_bit(FR_WAITING, &req->flags); __set_bit(FR_WAITING, &req->flags);
if (for_background) if (for_background)
__set_bit(FR_BACKGROUND, &req->flags); __set_bit(FR_BACKGROUND, &req->flags);
if (unlikely(req->in.h.uid == ((uid_t)-1) || /*
* Keep the old behavior when idmappings support was not
* declared by a FUSE server.
*
* For those FUSE servers who support idmapped mounts,
* we send UID/GID only along with "inode creation"
* fuse requests, otherwise idmap == &invalid_mnt_idmap and
* req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID.
*/
fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns);
fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns);
req->in.h.uid = from_kuid(fc->user_ns, fsuid);
req->in.h.gid = from_kgid(fc->user_ns, fsgid);
if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) ||
req->in.h.gid == ((gid_t)-1))) { req->in.h.gid == ((gid_t)-1))) {
fuse_put_request(req); fuse_put_request(req);
return ERR_PTR(-EOVERFLOW); return ERR_PTR(-EOVERFLOW);
} }
return req; return req;
out: out:
@ -194,11 +216,22 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
} }
EXPORT_SYMBOL_GPL(fuse_len_args); EXPORT_SYMBOL_GPL(fuse_len_args);
u64 fuse_get_unique(struct fuse_iqueue *fiq) static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
{ {
fiq->reqctr += FUSE_REQ_ID_STEP; fiq->reqctr += FUSE_REQ_ID_STEP;
return fiq->reqctr; return fiq->reqctr;
} }
u64 fuse_get_unique(struct fuse_iqueue *fiq)
{
u64 ret;
spin_lock(&fiq->lock);
ret = fuse_get_unique_locked(fiq);
spin_unlock(&fiq->lock);
return ret;
}
EXPORT_SYMBOL_GPL(fuse_get_unique); EXPORT_SYMBOL_GPL(fuse_get_unique);
static unsigned int fuse_req_hash(u64 unique) static unsigned int fuse_req_hash(u64 unique)
@ -217,22 +250,70 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock); spin_unlock(&fiq->lock);
} }
static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
{
spin_lock(&fiq->lock);
if (fiq->connected) {
fiq->forget_list_tail->next = forget;
fiq->forget_list_tail = forget;
fuse_dev_wake_and_unlock(fiq);
} else {
kfree(forget);
spin_unlock(&fiq->lock);
}
}
static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->lock);
if (list_empty(&req->intr_entry)) {
list_add_tail(&req->intr_entry, &fiq->interrupts);
/*
* Pairs with smp_mb() implied by test_and_set_bit()
* from fuse_request_end().
*/
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
list_del_init(&req->intr_entry);
spin_unlock(&fiq->lock);
} else {
fuse_dev_wake_and_unlock(fiq);
}
} else {
spin_unlock(&fiq->lock);
}
}
static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->lock);
if (fiq->connected) {
if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
req->in.h.unique = fuse_get_unique_locked(fiq);
list_add_tail(&req->list, &fiq->pending);
fuse_dev_wake_and_unlock(fiq);
} else {
spin_unlock(&fiq->lock);
req->out.h.error = -ENOTCONN;
clear_bit(FR_PENDING, &req->flags);
fuse_request_end(req);
}
}
const struct fuse_iqueue_ops fuse_dev_fiq_ops = { const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
.wake_forget_and_unlock = fuse_dev_wake_and_unlock, .send_forget = fuse_dev_queue_forget,
.wake_interrupt_and_unlock = fuse_dev_wake_and_unlock, .send_interrupt = fuse_dev_queue_interrupt,
.wake_pending_and_unlock = fuse_dev_wake_and_unlock, .send_req = fuse_dev_queue_req,
}; };
EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
static void queue_request_and_unlock(struct fuse_iqueue *fiq, static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
struct fuse_req *req)
__releases(fiq->lock)
{ {
req->in.h.len = sizeof(struct fuse_in_header) + req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs, fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args); (struct fuse_arg *) req->args->in_args);
list_add_tail(&req->list, &fiq->pending); trace_fuse_request_send(req);
fiq->ops->wake_pending_and_unlock(fiq); fiq->ops->send_req(fiq, req);
} }
void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
@ -243,15 +324,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
forget->forget_one.nodeid = nodeid; forget->forget_one.nodeid = nodeid;
forget->forget_one.nlookup = nlookup; forget->forget_one.nlookup = nlookup;
spin_lock(&fiq->lock); fiq->ops->send_forget(fiq, forget);
if (fiq->connected) {
fiq->forget_list_tail->next = forget;
fiq->forget_list_tail = forget;
fiq->ops->wake_forget_and_unlock(fiq);
} else {
kfree(forget);
spin_unlock(&fiq->lock);
}
} }
static void flush_bg_queue(struct fuse_conn *fc) static void flush_bg_queue(struct fuse_conn *fc)
@ -265,9 +338,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
req = list_first_entry(&fc->bg_queue, struct fuse_req, list); req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
list_del(&req->list); list_del(&req->list);
fc->active_background++; fc->active_background++;
spin_lock(&fiq->lock); fuse_send_one(fiq, req);
req->in.h.unique = fuse_get_unique(fiq);
queue_request_and_unlock(fiq, req);
} }
} }
@ -288,6 +359,7 @@ void fuse_request_end(struct fuse_req *req)
if (test_and_set_bit(FR_FINISHED, &req->flags)) if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request; goto put_request;
trace_fuse_request_end(req);
/* /*
* test_and_set_bit() implies smp_mb() between bit * test_and_set_bit() implies smp_mb() between bit
* changing and below FR_INTERRUPTED check. Pairs with * changing and below FR_INTERRUPTED check. Pairs with
@ -337,29 +409,12 @@ static int queue_interrupt(struct fuse_req *req)
{ {
struct fuse_iqueue *fiq = &req->fm->fc->iq; struct fuse_iqueue *fiq = &req->fm->fc->iq;
spin_lock(&fiq->lock);
/* Check for we've sent request to interrupt this req */ /* Check for we've sent request to interrupt this req */
if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
spin_unlock(&fiq->lock);
return -EINVAL; return -EINVAL;
}
if (list_empty(&req->intr_entry)) { fiq->ops->send_interrupt(fiq, req);
list_add_tail(&req->intr_entry, &fiq->interrupts);
/*
* Pairs with smp_mb() implied by test_and_set_bit()
* from fuse_request_end().
*/
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
list_del_init(&req->intr_entry);
spin_unlock(&fiq->lock);
return 0;
}
fiq->ops->wake_interrupt_and_unlock(fiq);
} else {
spin_unlock(&fiq->lock);
}
return 0; return 0;
} }
@ -414,21 +469,15 @@ static void __fuse_request_send(struct fuse_req *req)
struct fuse_iqueue *fiq = &req->fm->fc->iq; struct fuse_iqueue *fiq = &req->fm->fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
spin_lock(&fiq->lock);
if (!fiq->connected) { /* acquire extra reference, since request is still needed after
spin_unlock(&fiq->lock); fuse_request_end() */
req->out.h.error = -ENOTCONN;
} else {
req->in.h.unique = fuse_get_unique(fiq);
/* acquire extra reference, since request is still needed
after fuse_request_end() */
__fuse_get_request(req); __fuse_get_request(req);
queue_request_and_unlock(fiq, req); fuse_send_one(fiq, req);
request_wait_answer(req); request_wait_answer(req);
/* Pairs with smp_wmb() in fuse_request_end() */ /* Pairs with smp_wmb() in fuse_request_end() */
smp_rmb(); smp_rmb();
}
} }
static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
@ -468,8 +517,14 @@ static void fuse_force_creds(struct fuse_req *req)
{ {
struct fuse_conn *fc = req->fm->fc; struct fuse_conn *fc = req->fm->fc;
if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) {
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
} else {
req->in.h.uid = FUSE_INVALID_UIDGID;
req->in.h.gid = FUSE_INVALID_UIDGID;
}
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
} }
@ -484,7 +539,9 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
__set_bit(FR_ASYNC, &req->flags); __set_bit(FR_ASYNC, &req->flags);
} }
ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
struct fuse_mount *fm,
struct fuse_args *args)
{ {
struct fuse_conn *fc = fm->fc; struct fuse_conn *fc = fm->fc;
struct fuse_req *req; struct fuse_req *req;
@ -501,7 +558,7 @@ ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
__set_bit(FR_FORCE, &req->flags); __set_bit(FR_FORCE, &req->flags);
} else { } else {
WARN_ON(args->nocreds); WARN_ON(args->nocreds);
req = fuse_get_req(fm, false); req = fuse_get_req(idmap, fm, false);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
} }
@ -562,7 +619,7 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
__set_bit(FR_BACKGROUND, &req->flags); __set_bit(FR_BACKGROUND, &req->flags);
} else { } else {
WARN_ON(args->nocreds); WARN_ON(args->nocreds);
req = fuse_get_req(fm, true); req = fuse_get_req(&invalid_mnt_idmap, fm, true);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
} }
@ -583,9 +640,8 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
{ {
struct fuse_req *req; struct fuse_req *req;
struct fuse_iqueue *fiq = &fm->fc->iq; struct fuse_iqueue *fiq = &fm->fc->iq;
int err = 0;
req = fuse_get_req(fm, false); req = fuse_get_req(&invalid_mnt_idmap, fm, false);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
@ -594,16 +650,9 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
fuse_args_to_req(req, args); fuse_args_to_req(req, args);
spin_lock(&fiq->lock); fuse_send_one(fiq, req);
if (fiq->connected) {
queue_request_and_unlock(fiq, req);
} else {
err = -ENODEV;
spin_unlock(&fiq->lock);
fuse_put_request(req);
}
return err; return 0;
} }
/* /*
@ -1075,7 +1124,7 @@ __releases(fiq->lock)
return err ? err : reqsize; return err ? err : reqsize;
} }
struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
unsigned int max, unsigned int max,
unsigned int *countp) unsigned int *countp)
{ {
@ -1096,7 +1145,6 @@ struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
return head; return head;
} }
EXPORT_SYMBOL(fuse_dequeue_forget);
static int fuse_read_single_forget(struct fuse_iqueue *fiq, static int fuse_read_single_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, struct fuse_copy_state *cs,
@ -1111,7 +1159,7 @@ __releases(fiq->lock)
struct fuse_in_header ih = { struct fuse_in_header ih = {
.opcode = FUSE_FORGET, .opcode = FUSE_FORGET,
.nodeid = forget->forget_one.nodeid, .nodeid = forget->forget_one.nodeid,
.unique = fuse_get_unique(fiq), .unique = fuse_get_unique_locked(fiq),
.len = sizeof(ih) + sizeof(arg), .len = sizeof(ih) + sizeof(arg),
}; };
@ -1142,7 +1190,7 @@ __releases(fiq->lock)
struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_batch_forget_in arg = { .count = 0 };
struct fuse_in_header ih = { struct fuse_in_header ih = {
.opcode = FUSE_BATCH_FORGET, .opcode = FUSE_BATCH_FORGET,
.unique = fuse_get_unique(fiq), .unique = fuse_get_unique_locked(fiq),
.len = sizeof(ih) + sizeof(arg), .len = sizeof(ih) + sizeof(arg),
}; };
@ -1830,7 +1878,7 @@ static void fuse_resend(struct fuse_conn *fc)
} }
/* iq and pq requests are both oldest to newest */ /* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending); list_splice(&to_queue, &fiq->pending);
fiq->ops->wake_pending_and_unlock(fiq); fuse_dev_wake_and_unlock(fiq);
} }
static int fuse_notify_resend(struct fuse_conn *fc) static int fuse_notify_resend(struct fuse_conn *fc)

View File

@ -545,17 +545,21 @@ static u32 fuse_ext_size(size_t size)
/* /*
* This adds just a single supplementary group that matches the parent's group. * This adds just a single supplementary group that matches the parent's group.
*/ */
static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext) static int get_create_supp_group(struct mnt_idmap *idmap,
struct inode *dir,
struct fuse_in_arg *ext)
{ {
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_ext_header *xh; struct fuse_ext_header *xh;
struct fuse_supp_groups *sg; struct fuse_supp_groups *sg;
kgid_t kgid = dir->i_gid; kgid_t kgid = dir->i_gid;
vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
gid_t parent_gid = from_kgid(fc->user_ns, kgid); gid_t parent_gid = from_kgid(fc->user_ns, kgid);
u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0])); u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) || if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
!in_group_p(kgid)) !vfsgid_in_group_p(vfsgid))
return 0; return 0;
xh = extend_arg(ext, sg_len); xh = extend_arg(ext, sg_len);
@ -572,7 +576,8 @@ static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext)
return 0; return 0;
} }
static int get_create_ext(struct fuse_args *args, static int get_create_ext(struct mnt_idmap *idmap,
struct fuse_args *args,
struct inode *dir, struct dentry *dentry, struct inode *dir, struct dentry *dentry,
umode_t mode) umode_t mode)
{ {
@ -583,7 +588,7 @@ static int get_create_ext(struct fuse_args *args,
if (fc->init_security) if (fc->init_security)
err = get_security_context(dentry, mode, &ext); err = get_security_context(dentry, mode, &ext);
if (!err && fc->create_supp_group) if (!err && fc->create_supp_group)
err = get_create_supp_group(dir, &ext); err = get_create_supp_group(idmap, dir, &ext);
if (!err && ext.size) { if (!err && ext.size) {
WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
@ -609,9 +614,9 @@ static void free_ext_value(struct fuse_args *args)
* If the filesystem doesn't support this, then fall back to separate * If the filesystem doesn't support this, then fall back to separate
* 'mknod' + 'open' requests. * 'mknod' + 'open' requests.
*/ */
static int fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, unsigned int flags, struct dentry *entry, struct file *file,
umode_t mode, u32 opcode) unsigned int flags, umode_t mode, u32 opcode)
{ {
int err; int err;
struct inode *inode; struct inode *inode;
@ -668,11 +673,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out_args[1].size = sizeof(*outopenp); args.out_args[1].size = sizeof(*outopenp);
args.out_args[1].value = outopenp; args.out_args[1].value = outopenp;
err = get_create_ext(&args, dir, entry, mode); err = get_create_ext(idmap, &args, dir, entry, mode);
if (err) if (err)
goto out_free_ff; goto out_free_ff;
err = fuse_simple_request(fm, &args); err = fuse_simple_idmap_request(idmap, fm, &args);
free_ext_value(&args); free_ext_value(&args);
if (err) if (err)
goto out_free_ff; goto out_free_ff;
@ -729,6 +734,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
umode_t mode) umode_t mode)
{ {
int err; int err;
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_conn *fc = get_fuse_conn(dir);
struct dentry *res = NULL; struct dentry *res = NULL;
@ -753,7 +759,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
if (fc->no_create) if (fc->no_create)
goto mknod; goto mknod;
err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE); err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_create = 1; fc->no_create = 1;
goto mknod; goto mknod;
@ -764,7 +770,7 @@ out_dput:
return err; return err;
mknod: mknod:
err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); err = fuse_mknod(idmap, dir, entry, mode, 0);
if (err) if (err)
goto out_dput; goto out_dput;
no_open: no_open:
@ -774,9 +780,9 @@ no_open:
/* /*
* Code shared between mknod, mkdir, symlink and link * Code shared between mknod, mkdir, symlink and link
*/ */
static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
struct inode *dir, struct dentry *entry, struct fuse_args *args, struct inode *dir,
umode_t mode) struct dentry *entry, umode_t mode)
{ {
struct fuse_entry_out outarg; struct fuse_entry_out outarg;
struct inode *inode; struct inode *inode;
@ -798,12 +804,12 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
args->out_args[0].value = &outarg; args->out_args[0].value = &outarg;
if (args->opcode != FUSE_LINK) { if (args->opcode != FUSE_LINK) {
err = get_create_ext(args, dir, entry, mode); err = get_create_ext(idmap, args, dir, entry, mode);
if (err) if (err)
goto out_put_forget_req; goto out_put_forget_req;
} }
err = fuse_simple_request(fm, args); err = fuse_simple_idmap_request(idmap, fm, args);
free_ext_value(args); free_ext_value(args);
if (err) if (err)
goto out_put_forget_req; goto out_put_forget_req;
@ -864,13 +870,13 @@ static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name; args.in_args[1].value = entry->d_name.name;
return create_new_entry(fm, &args, dir, entry, mode); return create_new_entry(idmap, fm, &args, dir, entry, mode);
} }
static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, umode_t mode, bool excl) struct dentry *entry, umode_t mode, bool excl)
{ {
return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); return fuse_mknod(idmap, dir, entry, mode, 0);
} }
static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
@ -882,7 +888,8 @@ static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (fc->no_tmpfile) if (fc->no_tmpfile)
return -EOPNOTSUPP; return -EOPNOTSUPP;
err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
file->f_flags, mode, FUSE_TMPFILE);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_tmpfile = 1; fc->no_tmpfile = 1;
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
@ -909,7 +916,7 @@ static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name; args.in_args[1].value = entry->d_name.name;
return create_new_entry(fm, &args, dir, entry, S_IFDIR); return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
} }
static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
@ -925,7 +932,7 @@ static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
args.in_args[0].value = entry->d_name.name; args.in_args[0].value = entry->d_name.name;
args.in_args[1].size = len; args.in_args[1].size = len;
args.in_args[1].value = link; args.in_args[1].value = link;
return create_new_entry(fm, &args, dir, entry, S_IFLNK); return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK);
} }
void fuse_flush_time_update(struct inode *inode) void fuse_flush_time_update(struct inode *inode)
@ -1019,7 +1026,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
return err; return err;
} }
static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
struct inode *newdir, struct dentry *newent, struct inode *newdir, struct dentry *newent,
unsigned int flags, int opcode, size_t argsize) unsigned int flags, int opcode, size_t argsize)
{ {
@ -1040,7 +1047,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
args.in_args[1].value = oldent->d_name.name; args.in_args[1].value = oldent->d_name.name;
args.in_args[2].size = newent->d_name.len + 1; args.in_args[2].size = newent->d_name.len + 1;
args.in_args[2].value = newent->d_name.name; args.in_args[2].value = newent->d_name.name;
err = fuse_simple_request(fm, &args); err = fuse_simple_idmap_request(idmap, fm, &args);
if (!err) { if (!err) {
/* ctime changes */ /* ctime changes */
fuse_update_ctime(d_inode(oldent)); fuse_update_ctime(d_inode(oldent));
@ -1086,7 +1093,8 @@ static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
if (fc->no_rename2 || fc->minor < 23) if (fc->no_rename2 || fc->minor < 23)
return -EINVAL; return -EINVAL;
err = fuse_rename_common(olddir, oldent, newdir, newent, flags, err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
olddir, oldent, newdir, newent, flags,
FUSE_RENAME2, FUSE_RENAME2,
sizeof(struct fuse_rename2_in)); sizeof(struct fuse_rename2_in));
if (err == -ENOSYS) { if (err == -ENOSYS) {
@ -1094,7 +1102,7 @@ static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
err = -EINVAL; err = -EINVAL;
} }
} else { } else {
err = fuse_rename_common(olddir, oldent, newdir, newent, 0, err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
FUSE_RENAME, FUSE_RENAME,
sizeof(struct fuse_rename_in)); sizeof(struct fuse_rename_in));
} }
@ -1119,7 +1127,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].size = newent->d_name.len + 1;
args.in_args[1].value = newent->d_name.name; args.in_args[1].value = newent->d_name.name;
err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); err = create_new_entry(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
if (!err) if (!err)
fuse_update_ctime_in_cache(inode); fuse_update_ctime_in_cache(inode);
else if (err == -EINTR) else if (err == -EINTR)
@ -1128,18 +1136,22 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
return err; return err;
} }
static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
struct kstat *stat) struct fuse_attr *attr, struct kstat *stat)
{ {
unsigned int blkbits; unsigned int blkbits;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
make_kuid(fc->user_ns, attr->uid));
vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
make_kgid(fc->user_ns, attr->gid));
stat->dev = inode->i_sb->s_dev; stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino; stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink; stat->nlink = attr->nlink;
stat->uid = make_kuid(fc->user_ns, attr->uid); stat->uid = vfsuid_into_kuid(vfsuid);
stat->gid = make_kgid(fc->user_ns, attr->gid); stat->gid = vfsgid_into_kgid(vfsgid);
stat->rdev = inode->i_rdev; stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime; stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec; stat->atime.tv_nsec = attr->atimensec;
@ -1178,8 +1190,8 @@ static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
attr->blksize = sx->blksize; attr->blksize = sx->blksize;
} }
static int fuse_do_statx(struct inode *inode, struct file *file, static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
struct kstat *stat) struct file *file, struct kstat *stat)
{ {
int err; int err;
struct fuse_attr attr; struct fuse_attr attr;
@ -1232,15 +1244,15 @@ static int fuse_do_statx(struct inode *inode, struct file *file,
stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
stat->btime.tv_sec = sx->btime.tv_sec; stat->btime.tv_sec = sx->btime.tv_sec;
stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
fuse_fillattr(inode, &attr, stat); fuse_fillattr(idmap, inode, &attr, stat);
stat->result_mask |= STATX_TYPE; stat->result_mask |= STATX_TYPE;
} }
return 0; return 0;
} }
static int fuse_do_getattr(struct inode *inode, struct kstat *stat, static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
struct file *file) struct kstat *stat, struct file *file)
{ {
int err; int err;
struct fuse_getattr_in inarg; struct fuse_getattr_in inarg;
@ -1279,15 +1291,15 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
ATTR_TIMEOUT(&outarg), ATTR_TIMEOUT(&outarg),
attr_version); attr_version);
if (stat) if (stat)
fuse_fillattr(inode, &outarg.attr, stat); fuse_fillattr(idmap, inode, &outarg.attr, stat);
} }
} }
return err; return err;
} }
static int fuse_update_get_attr(struct inode *inode, struct file *file, static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
struct kstat *stat, u32 request_mask, struct file *file, struct kstat *stat,
unsigned int flags) u32 request_mask, unsigned int flags)
{ {
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
@ -1318,17 +1330,17 @@ retry:
forget_all_cached_acls(inode); forget_all_cached_acls(inode);
/* Try statx if BTIME is requested */ /* Try statx if BTIME is requested */
if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
err = fuse_do_statx(inode, file, stat); err = fuse_do_statx(idmap, inode, file, stat);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_statx = 1; fc->no_statx = 1;
err = 0; err = 0;
goto retry; goto retry;
} }
} else { } else {
err = fuse_do_getattr(inode, stat, file); err = fuse_do_getattr(idmap, inode, stat, file);
} }
} else if (stat) { } else if (stat) {
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); generic_fillattr(idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode; stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino; stat->ino = fi->orig_ino;
if (test_bit(FUSE_I_BTIME, &fi->state)) { if (test_bit(FUSE_I_BTIME, &fi->state)) {
@ -1342,7 +1354,7 @@ retry:
int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
{ {
return fuse_update_get_attr(inode, file, NULL, mask, 0); return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
} }
int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
@ -1462,6 +1474,14 @@ static int fuse_access(struct inode *inode, int mask)
BUG_ON(mask & MAY_NOT_BLOCK); BUG_ON(mask & MAY_NOT_BLOCK);
/*
* We should not send FUSE_ACCESS to the userspace
* when idmapped mounts are enabled as for this case
* we have fc->default_permissions = 1 and access
* permission checks are done on the kernel side.
*/
WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
if (fm->fc->no_access) if (fm->fc->no_access)
return 0; return 0;
@ -1486,7 +1506,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
return -ECHILD; return -ECHILD;
forget_all_cached_acls(inode); forget_all_cached_acls(inode);
return fuse_do_getattr(inode, NULL, NULL); return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
} }
/* /*
@ -1534,7 +1554,7 @@ static int fuse_permission(struct mnt_idmap *idmap,
} }
if (fc->default_permissions) { if (fc->default_permissions) {
err = generic_permission(&nop_mnt_idmap, inode, mask); err = generic_permission(idmap, inode, mask);
/* If permission is denied, try to refresh file /* If permission is denied, try to refresh file
attributes. This is also needed, because the root attributes. This is also needed, because the root
@ -1542,7 +1562,7 @@ static int fuse_permission(struct mnt_idmap *idmap,
if (err == -EACCES && !refreshed) { if (err == -EACCES && !refreshed) {
err = fuse_perm_getattr(inode, mask); err = fuse_perm_getattr(inode, mask);
if (!err) if (!err)
err = generic_permission(&nop_mnt_idmap, err = generic_permission(idmap,
inode, mask); inode, mask);
} }
@ -1738,17 +1758,29 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
return true; return true;
} }
static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
struct fuse_setattr_in *arg, bool trust_local_cmtime) struct iattr *iattr, struct fuse_setattr_in *arg,
bool trust_local_cmtime)
{ {
unsigned ivalid = iattr->ia_valid; unsigned ivalid = iattr->ia_valid;
if (ivalid & ATTR_MODE) if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
if (ivalid & ATTR_UID)
arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); if (ivalid & ATTR_UID) {
if (ivalid & ATTR_GID) kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
arg->valid |= FATTR_UID;
arg->uid = from_kuid(fc->user_ns, fsuid);
}
if (ivalid & ATTR_GID) {
kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
arg->valid |= FATTR_GID;
arg->gid = from_kgid(fc->user_ns, fsgid);
}
if (ivalid & ATTR_SIZE) if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) { if (ivalid & ATTR_ATIME) {
@ -1868,8 +1900,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
* vmtruncate() doesn't allow for this case, so do the rlimit checking * vmtruncate() doesn't allow for this case, so do the rlimit checking
* and the actual truncation by hand. * and the actual truncation by hand.
*/ */
int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct file *file) struct iattr *attr, struct file *file)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_mount *fm = get_fuse_mount(inode);
@ -1889,7 +1921,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (!fc->default_permissions) if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE; attr->ia_valid |= ATTR_FORCE;
err = setattr_prepare(&nop_mnt_idmap, dentry, attr); err = setattr_prepare(idmap, dentry, attr);
if (err) if (err)
return err; return err;
@ -1948,7 +1980,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg)); memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
if (file) { if (file) {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH; inarg.valid |= FATTR_FH;
@ -2065,7 +2097,7 @@ static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
* ia_mode calculation may have used stale i_mode. * ia_mode calculation may have used stale i_mode.
* Refresh and recalculate. * Refresh and recalculate.
*/ */
ret = fuse_do_getattr(inode, NULL, file); ret = fuse_do_getattr(idmap, inode, NULL, file);
if (ret) if (ret)
return ret; return ret;
@ -2083,7 +2115,7 @@ static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
if (!attr->ia_valid) if (!attr->ia_valid)
return 0; return 0;
ret = fuse_do_setattr(entry, attr, file); ret = fuse_do_setattr(idmap, entry, attr, file);
if (!ret) { if (!ret) {
/* /*
* If filesystem supports acls it may have updated acl xattrs in * If filesystem supports acls it may have updated acl xattrs in
@ -2122,7 +2154,7 @@ static int fuse_getattr(struct mnt_idmap *idmap,
return -EACCES; return -EACCES;
} }
return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
} }
static const struct inode_operations fuse_dir_inode_operations = { static const struct inode_operations fuse_dir_inode_operations = {

View File

@ -448,9 +448,6 @@ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
/* /*
* Check if any page in a range is under writeback * Check if any page in a range is under writeback
*
* This is currently done by walking the list of writepage requests
* for the inode, which can be pretty inefficient.
*/ */
static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
pgoff_t idx_to) pgoff_t idx_to)
@ -458,6 +455,9 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
bool found; bool found;
if (RB_EMPTY_ROOT(&fi->writepages))
return false;
spin_lock(&fi->lock); spin_lock(&fi->lock);
found = fuse_find_writeback(fi, idx_from, idx_to); found = fuse_find_writeback(fi, idx_from, idx_to);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
@ -1345,7 +1345,7 @@ static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from
/* shared locks are not allowed with parallel page cache IO */ /* shared locks are not allowed with parallel page cache IO */
if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state)) if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
return false; return true;
/* Parallel dio beyond EOF is not supported, at least for now. */ /* Parallel dio beyond EOF is not supported, at least for now. */
if (fuse_io_past_eof(iocb, from)) if (fuse_io_past_eof(iocb, from))
@ -1398,6 +1398,7 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
ssize_t written = 0; ssize_t written = 0;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
@ -1412,7 +1413,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
return err; return err;
if (fc->handle_killpriv_v2 && if (fc->handle_killpriv_v2 &&
setattr_should_drop_suidgid(&nop_mnt_idmap, setattr_should_drop_suidgid(idmap,
file_inode(file))) { file_inode(file))) {
goto writethrough; goto writethrough;
} }
@ -1762,27 +1763,31 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
for (i = 0; i < ap->num_pages; i++) for (i = 0; i < ap->num_pages; i++)
__free_page(ap->pages[i]); __free_page(ap->pages[i]);
if (wpa->ia.ff)
fuse_file_put(wpa->ia.ff, false); fuse_file_put(wpa->ia.ff, false);
kfree(ap->pages); kfree(ap->pages);
kfree(wpa); kfree(wpa);
} }
static void fuse_writepage_finish(struct fuse_mount *fm, static void fuse_writepage_finish_stat(struct inode *inode, struct page *page)
struct fuse_writepage_args *wpa) {
struct backing_dev_info *bdi = inode_to_bdi(inode);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(page, NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
{ {
struct fuse_args_pages *ap = &wpa->ia.ap; struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode; struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i; int i;
for (i = 0; i < ap->num_pages; i++) { for (i = 0; i < ap->num_pages; i++)
dec_wb_stat(&bdi->wb, WB_WRITEBACK); fuse_writepage_finish_stat(inode, ap->pages[i]);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
wake_up(&fi->page_waitq); wake_up(&fi->page_waitq);
} }
@ -1829,19 +1834,14 @@ __acquires(fi->lock)
out_free: out_free:
fi->writectr--; fi->writectr--;
rb_erase(&wpa->writepages_entry, &fi->writepages); rb_erase(&wpa->writepages_entry, &fi->writepages);
fuse_writepage_finish(fm, wpa); fuse_writepage_finish(wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
/* After rb_erase() aux request list is private */ /* After rb_erase() aux request list is private */
for (aux = wpa->next; aux; aux = next) { for (aux = wpa->next; aux; aux = next) {
struct backing_dev_info *bdi = inode_to_bdi(aux->inode);
next = aux->next; next = aux->next;
aux->next = NULL; aux->next = NULL;
fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(aux); fuse_writepage_free(aux);
} }
@ -1936,7 +1936,6 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
wpa->next = next->next; wpa->next = next->next;
next->next = NULL; next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff);
tree_insert(&fi->writepages, next); tree_insert(&fi->writepages, next);
/* /*
@ -1965,7 +1964,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_send_writepage(fm, next, inarg->offset + inarg->size); fuse_send_writepage(fm, next, inarg->offset + inarg->size);
} }
fi->writectr--; fi->writectr--;
fuse_writepage_finish(fm, wpa); fuse_writepage_finish(wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
fuse_writepage_free(wpa); fuse_writepage_free(wpa);
} }
@ -2049,49 +2048,77 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
rcu_read_unlock(); rcu_read_unlock();
} }
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
struct folio *tmp_folio, uint32_t page_index)
{
struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
folio_copy(tmp_folio, folio);
ap->pages[page_index] = &tmp_folio->page;
ap->descs[page_index].offset = 0;
ap->descs[page_index].length = PAGE_SIZE;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
struct fuse_file *ff)
{
struct inode *inode = folio->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
wpa = fuse_writepage_args_alloc();
if (!wpa)
return NULL;
fuse_writepage_add_to_bucket(fc, wpa);
fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0);
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->inode = inode;
wpa->ia.ff = ff;
ap = &wpa->ia.ap;
ap->args.in_pages = true;
ap->args.end = fuse_writepage_end;
return wpa;
}
static int fuse_writepage_locked(struct folio *folio) static int fuse_writepage_locked(struct folio *folio)
{ {
struct address_space *mapping = folio->mapping; struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_writepage_args *wpa; struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap; struct fuse_args_pages *ap;
struct folio *tmp_folio; struct folio *tmp_folio;
struct fuse_file *ff;
int error = -ENOMEM; int error = -ENOMEM;
folio_start_writeback(folio);
wpa = fuse_writepage_args_alloc();
if (!wpa)
goto err;
ap = &wpa->ia.ap;
tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0); tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
if (!tmp_folio) if (!tmp_folio)
goto err_free; goto err;
error = -EIO; error = -EIO;
wpa->ia.ff = fuse_write_file_get(fi); ff = fuse_write_file_get(fi);
if (!wpa->ia.ff) if (!ff)
goto err_nofile; goto err_nofile;
fuse_writepage_add_to_bucket(fc, wpa); wpa = fuse_writepage_args_setup(folio, ff);
fuse_write_args_fill(&wpa->ia, wpa->ia.ff, folio_pos(folio), 0); error = -ENOMEM;
if (!wpa)
goto err_writepage_args;
folio_copy(tmp_folio, folio); ap = &wpa->ia.ap;
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->next = NULL;
ap->args.in_pages = true;
ap->num_pages = 1; ap->num_pages = 1;
ap->pages[0] = &tmp_folio->page;
ap->descs[0].offset = 0;
ap->descs[0].length = PAGE_SIZE;
ap->args.end = fuse_writepage_end;
wpa->inode = inode;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); folio_start_writeback(folio);
node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP); fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
spin_lock(&fi->lock); spin_lock(&fi->lock);
tree_insert(&fi->writepages, wpa); tree_insert(&fi->writepages, wpa);
@ -2103,13 +2130,12 @@ static int fuse_writepage_locked(struct folio *folio)
return 0; return 0;
err_writepage_args:
fuse_file_put(ff, false);
err_nofile: err_nofile:
folio_put(tmp_folio); folio_put(tmp_folio);
err_free:
kfree(wpa);
err: err:
mapping_set_error(folio->mapping, error); mapping_set_error(folio->mapping, error);
folio_end_writeback(folio);
return error; return error;
} }
@ -2155,7 +2181,6 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
int num_pages = wpa->ia.ap.num_pages; int num_pages = wpa->ia.ap.num_pages;
int i; int i;
wpa->ia.ff = fuse_file_get(data->ff);
spin_lock(&fi->lock); spin_lock(&fi->lock);
list_add_tail(&wpa->queue_entry, &fi->queued_writes); list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode); fuse_flush_writepages(inode);
@ -2210,11 +2235,7 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
if (tmp) { if (tmp) {
struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(new_wpa); fuse_writepage_free(new_wpa);
} }
@ -2264,24 +2285,17 @@ static int fuse_writepages_fill(struct folio *folio,
struct inode *inode = data->inode; struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct page *tmp_page; struct folio *tmp_folio;
int err; int err;
if (!data->ff) {
err = -EIO;
data->ff = fuse_write_file_get(fi);
if (!data->ff)
goto out_unlock;
}
if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) { if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
fuse_writepages_send(data); fuse_writepages_send(data);
data->wpa = NULL; data->wpa = NULL;
} }
err = -ENOMEM; err = -ENOMEM;
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
if (!tmp_page) if (!tmp_folio)
goto out_unlock; goto out_unlock;
/* /*
@ -2299,35 +2313,20 @@ static int fuse_writepages_fill(struct folio *folio,
*/ */
if (data->wpa == NULL) { if (data->wpa == NULL) {
err = -ENOMEM; err = -ENOMEM;
wpa = fuse_writepage_args_alloc(); wpa = fuse_writepage_args_setup(folio, data->ff);
if (!wpa) { if (!wpa) {
__free_page(tmp_page); folio_put(tmp_folio);
goto out_unlock; goto out_unlock;
} }
fuse_writepage_add_to_bucket(fc, wpa); fuse_file_get(wpa->ia.ff);
data->max_pages = 1; data->max_pages = 1;
ap = &wpa->ia.ap; ap = &wpa->ia.ap;
fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0);
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->next = NULL;
ap->args.in_pages = true;
ap->args.end = fuse_writepage_end;
ap->num_pages = 0;
wpa->inode = inode;
} }
folio_start_writeback(folio); folio_start_writeback(folio);
copy_highpage(tmp_page, &folio->page); fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages);
ap->pages[ap->num_pages] = tmp_page;
ap->descs[ap->num_pages].offset = 0;
ap->descs[ap->num_pages].length = PAGE_SIZE;
data->orig_pages[ap->num_pages] = &folio->page; data->orig_pages[ap->num_pages] = &folio->page;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
err = 0; err = 0;
if (data->wpa) { if (data->wpa) {
/* /*
@ -2352,13 +2351,13 @@ static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_wb_data data; struct fuse_fill_wb_data data;
int err; int err;
err = -EIO;
if (fuse_is_bad(inode)) if (fuse_is_bad(inode))
goto out; return -EIO;
if (wbc->sync_mode == WB_SYNC_NONE && if (wbc->sync_mode == WB_SYNC_NONE &&
fc->num_background >= fc->congestion_threshold) fc->num_background >= fc->congestion_threshold)
@ -2366,7 +2365,9 @@ static int fuse_writepages(struct address_space *mapping,
data.inode = inode; data.inode = inode;
data.wpa = NULL; data.wpa = NULL;
data.ff = NULL; data.ff = fuse_write_file_get(fi);
if (!data.ff)
return -EIO;
err = -ENOMEM; err = -ENOMEM;
data.orig_pages = kcalloc(fc->max_pages, data.orig_pages = kcalloc(fc->max_pages,
@ -2380,11 +2381,10 @@ static int fuse_writepages(struct address_space *mapping,
WARN_ON(!data.wpa->ia.ap.num_pages); WARN_ON(!data.wpa->ia.ap.num_pages);
fuse_writepages_send(&data); fuse_writepages_send(&data);
} }
if (data.ff)
fuse_file_put(data.ff, false);
kfree(data.orig_pages); kfree(data.orig_pages);
out: out:
fuse_file_put(data.ff, false);
return err; return err;
} }
@ -2973,7 +2973,7 @@ static void fuse_do_truncate(struct file *file)
attr.ia_file = file; attr.ia_file = file;
attr.ia_valid |= ATTR_FILE; attr.ia_valid |= ATTR_FILE;
fuse_do_setattr(file_dentry(file), &attr, file); fuse_do_setattr(file_mnt_idmap(file), file_dentry(file), &attr, file);
} }
static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off) static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)

View File

@ -449,22 +449,19 @@ struct fuse_iqueue;
*/ */
struct fuse_iqueue_ops { struct fuse_iqueue_ops {
/** /**
* Signal that a forget has been queued * Send one forget
*/ */
void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link);
__releases(fiq->lock);
/** /**
* Signal that an INTERRUPT request has been queued * Send interrupt for request
*/ */
void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req);
__releases(fiq->lock);
/** /**
* Signal that a request has been queued * Send one request
*/ */
void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req);
__releases(fiq->lock);
/** /**
* Clean up when fuse_iqueue is destroyed * Clean up when fuse_iqueue is destroyed
@ -869,7 +866,7 @@ struct fuse_conn {
/** Negotiated minor version */ /** Negotiated minor version */
unsigned minor; unsigned minor;
/** Entry on the fuse_mount_list */ /** Entry on the fuse_conn_list */
struct list_head entry; struct list_head entry;
/** Device ID from the root super block */ /** Device ID from the root super block */
@ -1053,10 +1050,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
struct fuse_forget_link *fuse_alloc_forget(void); struct fuse_forget_link *fuse_alloc_forget(void);
struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
unsigned int max,
unsigned int *countp);
/* /*
* Initialize READ or READDIR request * Initialize READ or READDIR request
*/ */
@ -1154,7 +1147,22 @@ void __exit fuse_ctl_cleanup(void);
/** /**
* Simple request sending that does request allocation and freeing * Simple request sending that does request allocation and freeing
*/ */
ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args); ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
struct fuse_mount *fm,
struct fuse_args *args);
static inline ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
{
return __fuse_simple_request(&invalid_mnt_idmap, fm, args);
}
static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap,
struct fuse_mount *fm,
struct fuse_args *args)
{
return __fuse_simple_request(idmap, fm, args);
}
int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags); gfp_t gfp_flags);
@ -1330,8 +1338,8 @@ bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written);
int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct file *file); struct iattr *attr, struct file *file);
void fuse_set_initialized(struct fuse_conn *fc); void fuse_set_initialized(struct fuse_conn *fc);

132
fs/fuse/fuse_trace.h Normal file
View File

@ -0,0 +1,132 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM fuse
#if !defined(_TRACE_FUSE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_FUSE_H
#include <linux/tracepoint.h>
#define OPCODES \
EM( FUSE_LOOKUP, "FUSE_LOOKUP") \
EM( FUSE_FORGET, "FUSE_FORGET") \
EM( FUSE_GETATTR, "FUSE_GETATTR") \
EM( FUSE_SETATTR, "FUSE_SETATTR") \
EM( FUSE_READLINK, "FUSE_READLINK") \
EM( FUSE_SYMLINK, "FUSE_SYMLINK") \
EM( FUSE_MKNOD, "FUSE_MKNOD") \
EM( FUSE_MKDIR, "FUSE_MKDIR") \
EM( FUSE_UNLINK, "FUSE_UNLINK") \
EM( FUSE_RMDIR, "FUSE_RMDIR") \
EM( FUSE_RENAME, "FUSE_RENAME") \
EM( FUSE_LINK, "FUSE_LINK") \
EM( FUSE_OPEN, "FUSE_OPEN") \
EM( FUSE_READ, "FUSE_READ") \
EM( FUSE_WRITE, "FUSE_WRITE") \
EM( FUSE_STATFS, "FUSE_STATFS") \
EM( FUSE_RELEASE, "FUSE_RELEASE") \
EM( FUSE_FSYNC, "FUSE_FSYNC") \
EM( FUSE_SETXATTR, "FUSE_SETXATTR") \
EM( FUSE_GETXATTR, "FUSE_GETXATTR") \
EM( FUSE_LISTXATTR, "FUSE_LISTXATTR") \
EM( FUSE_REMOVEXATTR, "FUSE_REMOVEXATTR") \
EM( FUSE_FLUSH, "FUSE_FLUSH") \
EM( FUSE_INIT, "FUSE_INIT") \
EM( FUSE_OPENDIR, "FUSE_OPENDIR") \
EM( FUSE_READDIR, "FUSE_READDIR") \
EM( FUSE_RELEASEDIR, "FUSE_RELEASEDIR") \
EM( FUSE_FSYNCDIR, "FUSE_FSYNCDIR") \
EM( FUSE_GETLK, "FUSE_GETLK") \
EM( FUSE_SETLK, "FUSE_SETLK") \
EM( FUSE_SETLKW, "FUSE_SETLKW") \
EM( FUSE_ACCESS, "FUSE_ACCESS") \
EM( FUSE_CREATE, "FUSE_CREATE") \
EM( FUSE_INTERRUPT, "FUSE_INTERRUPT") \
EM( FUSE_BMAP, "FUSE_BMAP") \
EM( FUSE_DESTROY, "FUSE_DESTROY") \
EM( FUSE_IOCTL, "FUSE_IOCTL") \
EM( FUSE_POLL, "FUSE_POLL") \
EM( FUSE_NOTIFY_REPLY, "FUSE_NOTIFY_REPLY") \
EM( FUSE_BATCH_FORGET, "FUSE_BATCH_FORGET") \
EM( FUSE_FALLOCATE, "FUSE_FALLOCATE") \
EM( FUSE_READDIRPLUS, "FUSE_READDIRPLUS") \
EM( FUSE_RENAME2, "FUSE_RENAME2") \
EM( FUSE_LSEEK, "FUSE_LSEEK") \
EM( FUSE_COPY_FILE_RANGE, "FUSE_COPY_FILE_RANGE") \
EM( FUSE_SETUPMAPPING, "FUSE_SETUPMAPPING") \
EM( FUSE_REMOVEMAPPING, "FUSE_REMOVEMAPPING") \
EM( FUSE_SYNCFS, "FUSE_SYNCFS") \
EM( FUSE_TMPFILE, "FUSE_TMPFILE") \
EM( FUSE_STATX, "FUSE_STATX") \
EMe(CUSE_INIT, "CUSE_INIT")
/*
* This will turn the above table into TRACE_DEFINE_ENUM() for each of the
* entries.
*/
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
OPCODES
/* Now we redfine it with the table that __print_symbolic needs. */
#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}
TRACE_EVENT(fuse_request_send,
TP_PROTO(const struct fuse_req *req),
TP_ARGS(req),
TP_STRUCT__entry(
__field(dev_t, connection)
__field(uint64_t, unique)
__field(enum fuse_opcode, opcode)
__field(uint32_t, len)
),
TP_fast_assign(
__entry->connection = req->fm->fc->dev;
__entry->unique = req->in.h.unique;
__entry->opcode = req->in.h.opcode;
__entry->len = req->in.h.len;
),
TP_printk("connection %u req %llu opcode %u (%s) len %u ",
__entry->connection, __entry->unique, __entry->opcode,
__print_symbolic(__entry->opcode, OPCODES), __entry->len)
);
TRACE_EVENT(fuse_request_end,
TP_PROTO(const struct fuse_req *req),
TP_ARGS(req),
TP_STRUCT__entry(
__field(dev_t, connection)
__field(uint64_t, unique)
__field(uint32_t, len)
__field(int32_t, error)
),
TP_fast_assign(
__entry->connection = req->fm->fc->dev;
__entry->unique = req->in.h.unique;
__entry->len = req->out.h.len;
__entry->error = req->out.h.error;
),
TP_printk("connection %u req %llu len %u error %d", __entry->connection,
__entry->unique, __entry->len, __entry->error)
);
#endif /* _TRACE_FUSE_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE fuse_trace
#include <trace/define_trace.h>

View File

@ -1348,6 +1348,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
} }
if (flags & FUSE_NO_EXPORT_SUPPORT) if (flags & FUSE_NO_EXPORT_SUPPORT)
fm->sb->s_export_op = &fuse_export_fid_operations; fm->sb->s_export_op = &fuse_export_fid_operations;
if (flags & FUSE_ALLOW_IDMAP) {
if (fc->default_permissions)
fm->sb->s_iflags &= ~SB_I_NOIDMAP;
else
ok = false;
}
} else { } else {
ra_pages = fc->max_read / PAGE_SIZE; ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1; fc->no_lock = 1;
@ -1395,7 +1401,7 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND; FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP;
#ifdef CONFIG_FUSE_DAX #ifdef CONFIG_FUSE_DAX
if (fm->fc->dax) if (fm->fc->dax)
flags |= FUSE_MAP_ALIGNMENT; flags |= FUSE_MAP_ALIGNMENT;
@ -1572,6 +1578,7 @@ static void fuse_sb_defaults(struct super_block *sb)
sb->s_time_gran = 1; sb->s_time_gran = 1;
sb->s_export_op = &fuse_export_operations; sb->s_export_op = &fuse_export_operations;
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
sb->s_iflags |= SB_I_NOIDMAP;
if (sb->s_user_ns != &init_user_ns) if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
@ -1984,7 +1991,7 @@ static void fuse_kill_sb_anon(struct super_block *sb)
static struct file_system_type fuse_fs_type = { static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.name = "fuse", .name = "fuse",
.fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
.init_fs_context = fuse_init_fs_context, .init_fs_context = fuse_init_fs_context,
.parameters = fuse_fs_parameters, .parameters = fuse_fs_parameters,
.kill_sb = fuse_kill_sb_anon, .kill_sb = fuse_kill_sb_anon,
@ -2005,7 +2012,7 @@ static struct file_system_type fuseblk_fs_type = {
.init_fs_context = fuse_init_fs_context, .init_fs_context = fuse_init_fs_context,
.parameters = fuse_fs_parameters, .parameters = fuse_fs_parameters,
.kill_sb = fuse_kill_sb_blk, .kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP,
}; };
MODULE_ALIAS_FS("fuseblk"); MODULE_ALIAS_FS("fuseblk");

View File

@ -228,16 +228,13 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
if (map->flags || map->padding) if (map->flags || map->padding)
goto out; goto out;
file = fget(map->fd); file = fget_raw(map->fd);
res = -EBADF; res = -EBADF;
if (!file) if (!file)
goto out; goto out;
res = -EOPNOTSUPP;
if (!file->f_op->read_iter || !file->f_op->write_iter)
goto out_fput;
backing_sb = file_inode(file)->i_sb; backing_sb = file_inode(file)->i_sb;
pr_info("%s: %x:%pD %i\n", __func__, backing_sb->s_dev, file, backing_sb->s_stack_depth);
res = -ELOOP; res = -ELOOP;
if (backing_sb->s_stack_depth >= fc->max_stack_depth) if (backing_sb->s_stack_depth >= fc->max_stack_depth)
goto out_fput; goto out_fput;

View File

@ -1091,22 +1091,13 @@ static struct virtio_driver virtio_fs_driver = {
#endif #endif
}; };
static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
__releases(fiq->lock)
{ {
struct fuse_forget_link *link;
struct virtio_fs_forget *forget; struct virtio_fs_forget *forget;
struct virtio_fs_forget_req *req; struct virtio_fs_forget_req *req;
struct virtio_fs *fs; struct virtio_fs *fs = fiq->priv;
struct virtio_fs_vq *fsvq; struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
u64 unique; u64 unique = fuse_get_unique(fiq);
link = fuse_dequeue_forget(fiq, 1, NULL);
unique = fuse_get_unique(fiq);
fs = fiq->priv;
fsvq = &fs->vqs[VQ_HIPRIO];
spin_unlock(&fiq->lock);
/* Allocate a buffer for the request */ /* Allocate a buffer for the request */
forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
@ -1126,8 +1117,7 @@ __releases(fiq->lock)
kfree(link); kfree(link);
} }
static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
__releases(fiq->lock)
{ {
/* /*
* TODO interrupts. * TODO interrupts.
@ -1136,7 +1126,6 @@ __releases(fiq->lock)
* Exceptions are blocking lock operations; for example fcntl(F_SETLKW) * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
* with shared lock between host and guest. * with shared lock between host and guest.
*/ */
spin_unlock(&fiq->lock);
} }
/* Count number of scatter-gather elements required */ /* Count number of scatter-gather elements required */
@ -1341,21 +1330,17 @@ out:
return ret; return ret;
} }
static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
__releases(fiq->lock)
{ {
unsigned int queue_id; unsigned int queue_id;
struct virtio_fs *fs; struct virtio_fs *fs;
struct fuse_req *req;
struct virtio_fs_vq *fsvq; struct virtio_fs_vq *fsvq;
int ret; int ret;
WARN_ON(list_empty(&fiq->pending)); if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
req = list_last_entry(&fiq->pending, struct fuse_req, list); req->in.h.unique = fuse_get_unique(fiq);
clear_bit(FR_PENDING, &req->flags); clear_bit(FR_PENDING, &req->flags);
list_del_init(&req->list);
WARN_ON(!list_empty(&fiq->pending));
spin_unlock(&fiq->lock);
fs = fiq->priv; fs = fiq->priv;
queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()]; queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
@ -1393,9 +1378,9 @@ __releases(fiq->lock)
} }
static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, .send_forget = virtio_fs_send_forget,
.wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, .send_interrupt = virtio_fs_send_interrupt,
.wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, .send_req = virtio_fs_send_req,
.release = virtio_fs_fiq_release, .release = virtio_fs_fiq_release,
}; };
@ -1628,6 +1613,7 @@ static struct file_system_type virtio_fs_type = {
.name = "virtiofs", .name = "virtiofs",
.init_fs_context = virtio_fs_init_fs_context, .init_fs_context = virtio_fs_init_fs_context,
.kill_sb = virtio_kill_sb, .kill_sb = virtio_kill_sb,
.fs_flags = FS_ALLOW_IDMAP,
}; };
static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)

View File

@ -32,6 +32,15 @@ struct mnt_idmap nop_mnt_idmap = {
}; };
EXPORT_SYMBOL_GPL(nop_mnt_idmap); EXPORT_SYMBOL_GPL(nop_mnt_idmap);
/*
* Carries the invalid idmapping of a full 0-4294967295 {g,u}id range.
* This means that all {g,u}ids are mapped to INVALID_VFS{G,U}ID.
*/
struct mnt_idmap invalid_mnt_idmap = {
.count = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL_GPL(invalid_mnt_idmap);
/** /**
* initial_idmapping - check whether this is the initial mapping * initial_idmapping - check whether this is the initial mapping
* @ns: idmapping to check * @ns: idmapping to check
@ -75,6 +84,8 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
if (idmap == &nop_mnt_idmap) if (idmap == &nop_mnt_idmap)
return VFSUIDT_INIT(kuid); return VFSUIDT_INIT(kuid);
if (idmap == &invalid_mnt_idmap)
return INVALID_VFSUID;
if (initial_idmapping(fs_userns)) if (initial_idmapping(fs_userns))
uid = __kuid_val(kuid); uid = __kuid_val(kuid);
else else
@ -112,6 +123,8 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
if (idmap == &nop_mnt_idmap) if (idmap == &nop_mnt_idmap)
return VFSGIDT_INIT(kgid); return VFSGIDT_INIT(kgid);
if (idmap == &invalid_mnt_idmap)
return INVALID_VFSGID;
if (initial_idmapping(fs_userns)) if (initial_idmapping(fs_userns))
gid = __kgid_val(kgid); gid = __kgid_val(kgid);
else else
@ -140,6 +153,8 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
if (idmap == &nop_mnt_idmap) if (idmap == &nop_mnt_idmap)
return AS_KUIDT(vfsuid); return AS_KUIDT(vfsuid);
if (idmap == &invalid_mnt_idmap)
return INVALID_UID;
uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid)); uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
if (uid == (uid_t)-1) if (uid == (uid_t)-1)
return INVALID_UID; return INVALID_UID;
@ -167,6 +182,8 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
if (idmap == &nop_mnt_idmap) if (idmap == &nop_mnt_idmap)
return AS_KGIDT(vfsgid); return AS_KGIDT(vfsgid);
if (idmap == &invalid_mnt_idmap)
return INVALID_GID;
gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid)); gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
if (gid == (gid_t)-1) if (gid == (gid_t)-1)
return INVALID_GID; return INVALID_GID;
@ -296,7 +313,7 @@ struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
*/ */
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
{ {
if (idmap != &nop_mnt_idmap) if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap)
refcount_inc(&idmap->count); refcount_inc(&idmap->count);
return idmap; return idmap;
@ -312,7 +329,8 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
*/ */
void mnt_idmap_put(struct mnt_idmap *idmap) void mnt_idmap_put(struct mnt_idmap *idmap)
{ {
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap &&
refcount_dec_and_test(&idmap->count))
free_mnt_idmap(idmap); free_mnt_idmap(idmap);
} }
EXPORT_SYMBOL_GPL(mnt_idmap_put); EXPORT_SYMBOL_GPL(mnt_idmap_put);

View File

@ -4471,6 +4471,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
return -EINVAL; return -EINVAL;
/* The filesystem has turned off idmapped mounts. */
if (m->mnt_sb->s_iflags & SB_I_NOIDMAP)
return -EINVAL;
/* We're not controlling the superblock. */ /* We're not controlling the superblock. */
if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;

View File

@ -1229,6 +1229,7 @@ extern int send_sigurg(struct file *file);
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
/* Possible states of 'frozen' field */ /* Possible states of 'frozen' field */
enum { enum {

View File

@ -9,6 +9,7 @@ struct mnt_idmap;
struct user_namespace; struct user_namespace;
extern struct mnt_idmap nop_mnt_idmap; extern struct mnt_idmap nop_mnt_idmap;
extern struct mnt_idmap invalid_mnt_idmap;
extern struct user_namespace init_user_ns; extern struct user_namespace init_user_ns;
typedef struct { typedef struct {

View File

@ -217,6 +217,9 @@
* - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
* - add FUSE_NO_EXPORT_SUPPORT init flag * - add FUSE_NO_EXPORT_SUPPORT init flag
* - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag
*
* 7.41
* - add FUSE_ALLOW_IDMAP
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
@ -252,7 +255,7 @@
#define FUSE_KERNEL_VERSION 7 #define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */ /** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 40 #define FUSE_KERNEL_MINOR_VERSION 41
/** The node ID of the root inode */ /** The node ID of the root inode */
#define FUSE_ROOT_ID 1 #define FUSE_ROOT_ID 1
@ -421,6 +424,7 @@ struct fuse_file_lock {
* FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support
* FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
* of the request ID indicates resend requests * of the request ID indicates resend requests
* FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
*/ */
#define FUSE_ASYNC_READ (1 << 0) #define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_POSIX_LOCKS (1 << 1)
@ -466,6 +470,7 @@ struct fuse_file_lock {
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
#define FUSE_ALLOW_IDMAP (1ULL << 40)
/** /**
* CUSE INIT request/reply flags * CUSE INIT request/reply flags
@ -984,6 +989,21 @@ struct fuse_fallocate_in {
*/ */
#define FUSE_UNIQUE_RESEND (1ULL << 63) #define FUSE_UNIQUE_RESEND (1ULL << 63)
/**
* This value will be set by the kernel to
* (struct fuse_in_header).{uid,gid} fields in
* case when:
* - fuse daemon enabled FUSE_ALLOW_IDMAP
* - idmapping information is not available and uid/gid
* can not be mapped in accordance with an idmapping.
*
* Note: an idmapping information always available
* for inode creation operations like:
* FUSE_MKNOD, FUSE_SYMLINK, FUSE_MKDIR, FUSE_TMPFILE,
* FUSE_CREATE and FUSE_RENAME2 (with RENAME_WHITEOUT).
*/
#define FUSE_INVALID_UIDGID ((uint32_t)(-1))
struct fuse_in_header { struct fuse_in_header {
uint32_t len; uint32_t len;
uint32_t opcode; uint32_t opcode;