forked from Minki/linux
7d6beb71da
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCYCegywAKCRCRxhvAZXjc
ouJ6AQDlf+7jCQlQdeKKoN9QDFfMzG1ooemat36EpRRTONaGuAD8D9A4sUsG4+5f
4IU5Lj9oY4DEmF8HenbWK2ZHsesL2Qg=
=yPaw
-----END PGP SIGNATURE-----
Merge tag 'idmapped-mounts-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull idmapped mounts from Christian Brauner:
"This introduces idmapped mounts which has been in the making for some
time. Simply put, different mounts can expose the same file or
directory with different ownership. This initial implementation comes
with ports for fat, ext4 and with Christoph's port for xfs with more
filesystems being actively worked on by independent people and
maintainers.
Idmapping mounts handle a wide range of long standing use-cases. Here
are just a few:
- Idmapped mounts make it possible to easily share files between
multiple users or multiple machines especially in complex
scenarios. For example, idmapped mounts will be used in the
implementation of portable home directories in
systemd-homed.service(8) where they allow users to move their home
directory to an external storage device and use it on multiple
computers where they are assigned different uids and gids. This
effectively makes it possible to assign random uids and gids at
login time.
- It is possible to share files from the host with unprivileged
containers without having to change ownership permanently through
chown(2).
- It is possible to idmap a container's rootfs and without having to
mangle every file. For example, Chromebooks use it to share the
user's Download folder with their unprivileged containers in their
Linux subsystem.
- It is possible to share files between containers with
non-overlapping idmappings.
- Filesystem that lack a proper concept of ownership such as fat can
use idmapped mounts to implement discretionary access (DAC)
permission checking.
- They allow users to efficiently changing ownership on a per-mount
basis without having to (recursively) chown(2) all files. In
contrast to chown (2) changing ownership of large sets of files is
instantenous with idmapped mounts. This is especially useful when
ownership of a whole root filesystem of a virtual machine or
container is changed. With idmapped mounts a single syscall
mount_setattr syscall will be sufficient to change the ownership of
all files.
- Idmapped mounts always take the current ownership into account as
idmappings specify what a given uid or gid is supposed to be mapped
to. This contrasts with the chown(2) syscall which cannot by itself
take the current ownership of the files it changes into account. It
simply changes the ownership to the specified uid and gid. This is
especially problematic when recursively chown(2)ing a large set of
files which is commong with the aforementioned portable home
directory and container and vm scenario.
- Idmapped mounts allow to change ownership locally, restricting it
to specific mounts, and temporarily as the ownership changes only
apply as long as the mount exists.
Several userspace projects have either already put up patches and
pull-requests for this feature or will do so should you decide to pull
this:
- systemd: In a wide variety of scenarios but especially right away
in their implementation of portable home directories.
https://systemd.io/HOME_DIRECTORY/
- container runtimes: containerd, runC, LXD:To share data between
host and unprivileged containers, unprivileged and privileged
containers, etc. The pull request for idmapped mounts support in
containerd, the default Kubernetes runtime is already up for quite
a while now: https://github.com/containerd/containerd/pull/4734
- The virtio-fs developers and several users have expressed interest
in using this feature with virtual machines once virtio-fs is
ported.
- ChromeOS: Sharing host-directories with unprivileged containers.
I've tightly synced with all those projects and all of those listed
here have also expressed their need/desire for this feature on the
mailing list. For more info on how people use this there's a bunch of
talks about this too. Here's just two recent ones:
https://www.cncf.io/wp-content/uploads/2020/12/Rootless-Containers-in-Gitpod.pdf
https://fosdem.org/2021/schedule/event/containers_idmap/
This comes with an extensive xfstests suite covering both ext4 and
xfs:
https://git.kernel.org/brauner/xfstests-dev/h/idmapped_mounts
It covers truncation, creation, opening, xattrs, vfscaps, setid
execution, setgid inheritance and more both with idmapped and
non-idmapped mounts. It already helped to discover an unrelated xfs
setgid inheritance bug which has since been fixed in mainline. It will
be sent for inclusion with the xfstests project should you decide to
merge this.
In order to support per-mount idmappings vfsmounts are marked with
user namespaces. The idmapping of the user namespace will be used to
map the ids of vfs objects when they are accessed through that mount.
By default all vfsmounts are marked with the initial user namespace.
The initial user namespace is used to indicate that a mount is not
idmapped. All operations behave as before and this is verified in the
testsuite.
Based on prior discussions we want to attach the whole user namespace
and not just a dedicated idmapping struct. This allows us to reuse all
the helpers that already exist for dealing with idmappings instead of
introducing a whole new range of helpers. In addition, if we decide in
the future that we are confident enough to enable unprivileged users
to setup idmapped mounts the permission checking can take into account
whether the caller is privileged in the user namespace the mount is
currently marked with.
The user namespace the mount will be marked with can be specified by
passing a file descriptor refering to the user namespace as an
argument to the new mount_setattr() syscall together with the new
MOUNT_ATTR_IDMAP flag. The system call follows the openat2() pattern
of extensibility.
The following conditions must be met in order to create an idmapped
mount:
- The caller must currently have the CAP_SYS_ADMIN capability in the
user namespace the underlying filesystem has been mounted in.
- The underlying filesystem must support idmapped mounts.
- The mount must not already be idmapped. This also implies that the
idmapping of a mount cannot be altered once it has been idmapped.
- The mount must be a detached/anonymous mount, i.e. it must have
been created by calling open_tree() with the OPEN_TREE_CLONE flag
and it must not already have been visible in the filesystem.
The last two points guarantee easier semantics for userspace and the
kernel and make the implementation significantly simpler.
By default vfsmounts are marked with the initial user namespace and no
behavioral or performance changes are observed.
The manpage with a detailed description can be found here:
1d7b902e28
In order to support idmapped mounts, filesystems need to be changed
and mark themselves with the FS_ALLOW_IDMAP flag in fs_flags. The
patches to convert individual filesystem are not very large or
complicated overall as can be seen from the included fat, ext4, and
xfs ports. Patches for other filesystems are actively worked on and
will be sent out separately. The xfstestsuite can be used to verify
that port has been done correctly.
The mount_setattr() syscall is motivated independent of the idmapped
mounts patches and it's been around since July 2019. One of the most
valuable features of the new mount api is the ability to perform
mounts based on file descriptors only.
Together with the lookup restrictions available in the openat2()
RESOLVE_* flag namespace which we added in v5.6 this is the first time
we are close to hardened and race-free (e.g. symlinks) mounting and
path resolution.
While userspace has started porting to the new mount api to mount
proper filesystems and create new bind-mounts it is currently not
possible to change mount options of an already existing bind mount in
the new mount api since the mount_setattr() syscall is missing.
With the addition of the mount_setattr() syscall we remove this last
restriction and userspace can now fully port to the new mount api,
covering every use-case the old mount api could. We also add the
crucial ability to recursively change mount options for a whole mount
tree, both removing and adding mount options at the same time. This
syscall has been requested multiple times by various people and
projects.
There is a simple tool available at
https://github.com/brauner/mount-idmapped
that allows to create idmapped mounts so people can play with this
patch series. I'll add support for the regular mount binary should you
decide to pull this in the following weeks:
Here's an example to a simple idmapped mount of another user's home
directory:
u1001@f2-vm:/$ sudo ./mount --idmap both:1000:1001:1 /home/ubuntu/ /mnt
u1001@f2-vm:/$ ls -al /home/ubuntu/
total 28
drwxr-xr-x 2 ubuntu ubuntu 4096 Oct 28 22:07 .
drwxr-xr-x 4 root root 4096 Oct 28 04:00 ..
-rw------- 1 ubuntu ubuntu 3154 Oct 28 22:12 .bash_history
-rw-r--r-- 1 ubuntu ubuntu 220 Feb 25 2020 .bash_logout
-rw-r--r-- 1 ubuntu ubuntu 3771 Feb 25 2020 .bashrc
-rw-r--r-- 1 ubuntu ubuntu 807 Feb 25 2020 .profile
-rw-r--r-- 1 ubuntu ubuntu 0 Oct 16 16:11 .sudo_as_admin_successful
-rw------- 1 ubuntu ubuntu 1144 Oct 28 00:43 .viminfo
u1001@f2-vm:/$ ls -al /mnt/
total 28
drwxr-xr-x 2 u1001 u1001 4096 Oct 28 22:07 .
drwxr-xr-x 29 root root 4096 Oct 28 22:01 ..
-rw------- 1 u1001 u1001 3154 Oct 28 22:12 .bash_history
-rw-r--r-- 1 u1001 u1001 220 Feb 25 2020 .bash_logout
-rw-r--r-- 1 u1001 u1001 3771 Feb 25 2020 .bashrc
-rw-r--r-- 1 u1001 u1001 807 Feb 25 2020 .profile
-rw-r--r-- 1 u1001 u1001 0 Oct 16 16:11 .sudo_as_admin_successful
-rw------- 1 u1001 u1001 1144 Oct 28 00:43 .viminfo
u1001@f2-vm:/$ touch /mnt/my-file
u1001@f2-vm:/$ setfacl -m u:1001:rwx /mnt/my-file
u1001@f2-vm:/$ sudo setcap -n 1001 cap_net_raw+ep /mnt/my-file
u1001@f2-vm:/$ ls -al /mnt/my-file
-rw-rwxr--+ 1 u1001 u1001 0 Oct 28 22:14 /mnt/my-file
u1001@f2-vm:/$ ls -al /home/ubuntu/my-file
-rw-rwxr--+ 1 ubuntu ubuntu 0 Oct 28 22:14 /home/ubuntu/my-file
u1001@f2-vm:/$ getfacl /mnt/my-file
getfacl: Removing leading '/' from absolute path names
# file: mnt/my-file
# owner: u1001
# group: u1001
user::rw-
user:u1001:rwx
group::rw-
mask::rwx
other::r--
u1001@f2-vm:/$ getfacl /home/ubuntu/my-file
getfacl: Removing leading '/' from absolute path names
# file: home/ubuntu/my-file
# owner: ubuntu
# group: ubuntu
user::rw-
user:ubuntu:rwx
group::rw-
mask::rwx
other::r--"
* tag 'idmapped-mounts-v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: (41 commits)
xfs: remove the possibly unused mp variable in xfs_file_compat_ioctl
xfs: support idmapped mounts
ext4: support idmapped mounts
fat: handle idmapped mounts
tests: add mount_setattr() selftests
fs: introduce MOUNT_ATTR_IDMAP
fs: add mount_setattr()
fs: add attr_flags_to_mnt_flags helper
fs: split out functions to hold writers
namespace: only take read lock in do_reconfigure_mnt()
mount: make {lock,unlock}_mount_hash() static
namespace: take lock_mount_hash() directly when changing flags
nfs: do not export idmapped mounts
overlayfs: do not mount on top of idmapped mounts
ecryptfs: do not mount on top of idmapped mounts
ima: handle idmapped mounts
apparmor: handle idmapped mounts
fs: make helpers idmap mount aware
exec: handle idmapped mounts
would_dump: handle idmapped mounts
...
741 lines
19 KiB
C
741 lines
19 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* NFS server file handle treatment.
|
|
*
|
|
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
|
|
* Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
|
|
* Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
|
|
* ... and again Southern-Winter 2001 to support export_operations
|
|
*/
|
|
|
|
#include <linux/exportfs.h>
|
|
|
|
#include <linux/sunrpc/svcauth_gss.h>
|
|
#include "nfsd.h"
|
|
#include "vfs.h"
|
|
#include "auth.h"
|
|
#include "trace.h"
|
|
|
|
#define NFSDDBG_FACILITY NFSDDBG_FH
|
|
|
|
|
|
/*
|
|
* our acceptability function.
|
|
* if NOSUBTREECHECK, accept anything
|
|
* if not, require that we can walk up to exp->ex_dentry
|
|
* doing some checks on the 'x' bits
|
|
*/
|
|
static int nfsd_acceptable(void *expv, struct dentry *dentry)
|
|
{
|
|
struct svc_export *exp = expv;
|
|
int rv;
|
|
struct dentry *tdentry;
|
|
struct dentry *parent;
|
|
|
|
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
|
|
return 1;
|
|
|
|
tdentry = dget(dentry);
|
|
while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) {
|
|
/* make sure parents give x permission to user */
|
|
int err;
|
|
parent = dget_parent(tdentry);
|
|
err = inode_permission(&init_user_ns,
|
|
d_inode(parent), MAY_EXEC);
|
|
if (err < 0) {
|
|
dput(parent);
|
|
break;
|
|
}
|
|
dput(tdentry);
|
|
tdentry = parent;
|
|
}
|
|
if (tdentry != exp->ex_path.dentry)
|
|
dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
|
|
rv = (tdentry == exp->ex_path.dentry);
|
|
dput(tdentry);
|
|
return rv;
|
|
}
|
|
|
|
/* Type check. The correct error return for type mismatches does not seem to be
|
|
* generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a
|
|
* comment in the NFSv3 spec says this is incorrect (implementation notes for
|
|
* the write call).
|
|
*/
|
|
static inline __be32
|
|
nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
|
|
umode_t requested)
|
|
{
|
|
umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
|
|
|
|
if (requested == 0) /* the caller doesn't care */
|
|
return nfs_ok;
|
|
if (mode == requested) {
|
|
if (mode == S_IFDIR && !d_can_lookup(dentry)) {
|
|
WARN_ON_ONCE(1);
|
|
return nfserr_notdir;
|
|
}
|
|
return nfs_ok;
|
|
}
|
|
/*
|
|
* v4 has an error more specific than err_notdir which we should
|
|
* return in preference to err_notdir:
|
|
*/
|
|
if (rqstp->rq_vers == 4 && mode == S_IFLNK)
|
|
return nfserr_symlink;
|
|
if (requested == S_IFDIR)
|
|
return nfserr_notdir;
|
|
if (mode == S_IFDIR)
|
|
return nfserr_isdir;
|
|
return nfserr_inval;
|
|
}
|
|
|
|
static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
|
|
{
|
|
if (flags & NFSEXP_INSECURE_PORT)
|
|
return true;
|
|
/* We don't require gss requests to use low ports: */
|
|
if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
|
|
return true;
|
|
return test_bit(RQ_SECURE, &rqstp->rq_flags);
|
|
}
|
|
|
|
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
|
|
struct svc_export *exp)
|
|
{
|
|
int flags = nfsexp_flags(rqstp, exp);
|
|
|
|
/* Check if the request originated from a secure port. */
|
|
if (!nfsd_originating_port_ok(rqstp, flags)) {
|
|
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
|
|
dprintk("nfsd: request from insecure port %s!\n",
|
|
svc_print_addr(rqstp, buf, sizeof(buf)));
|
|
return nfserr_perm;
|
|
}
|
|
|
|
/* Set user creds for this exportpoint */
|
|
return nfserrno(nfsd_setuser(rqstp, exp));
|
|
}
|
|
|
|
static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
|
|
struct dentry *dentry, struct svc_export *exp)
|
|
{
|
|
if (!(exp->ex_flags & NFSEXP_V4ROOT))
|
|
return nfs_ok;
|
|
/*
|
|
* v2/v3 clients have no need for the V4ROOT export--they use
|
|
* the mount protocl instead; also, further V4ROOT checks may be
|
|
* in v4-specific code, in which case v2/v3 clients could bypass
|
|
* them.
|
|
*/
|
|
if (!nfsd_v4client(rqstp))
|
|
return nfserr_stale;
|
|
/*
|
|
* We're exposing only the directories and symlinks that have to be
|
|
* traversed on the way to real exports:
|
|
*/
|
|
if (unlikely(!d_is_dir(dentry) &&
|
|
!d_is_symlink(dentry)))
|
|
return nfserr_stale;
|
|
/*
|
|
* A pseudoroot export gives permission to access only one
|
|
* single directory; the kernel has to make another upcall
|
|
* before granting access to anything else under it:
|
|
*/
|
|
if (unlikely(dentry != exp->ex_path.dentry))
|
|
return nfserr_stale;
|
|
return nfs_ok;
|
|
}
|
|
|
|
/*
|
|
* Use the given filehandle to look up the corresponding export and
|
|
* dentry. On success, the results are used to set fh_export and
|
|
* fh_dentry.
|
|
*/
|
|
static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
|
|
{
|
|
struct knfsd_fh *fh = &fhp->fh_handle;
|
|
struct fid *fid = NULL, sfid;
|
|
struct svc_export *exp;
|
|
struct dentry *dentry;
|
|
int fileid_type;
|
|
int data_left = fh->fh_size/4;
|
|
__be32 error;
|
|
|
|
error = nfserr_stale;
|
|
if (rqstp->rq_vers > 2)
|
|
error = nfserr_badhandle;
|
|
if (rqstp->rq_vers == 4 && fh->fh_size == 0)
|
|
return nfserr_nofilehandle;
|
|
|
|
if (fh->fh_version == 1) {
|
|
int len;
|
|
|
|
if (--data_left < 0)
|
|
return error;
|
|
if (fh->fh_auth_type != 0)
|
|
return error;
|
|
len = key_len(fh->fh_fsid_type) / 4;
|
|
if (len == 0)
|
|
return error;
|
|
if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
|
|
/* deprecated, convert to type 3 */
|
|
len = key_len(FSID_ENCODE_DEV)/4;
|
|
fh->fh_fsid_type = FSID_ENCODE_DEV;
|
|
/*
|
|
* struct knfsd_fh uses host-endian fields, which are
|
|
* sometimes used to hold net-endian values. This
|
|
* confuses sparse, so we must use __force here to
|
|
* keep it from complaining.
|
|
*/
|
|
fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
|
|
ntohl((__force __be32)fh->fh_fsid[1])));
|
|
fh->fh_fsid[1] = fh->fh_fsid[2];
|
|
}
|
|
data_left -= len;
|
|
if (data_left < 0)
|
|
return error;
|
|
exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
|
|
fid = (struct fid *)(fh->fh_fsid + len);
|
|
} else {
|
|
__u32 tfh[2];
|
|
dev_t xdev;
|
|
ino_t xino;
|
|
|
|
if (fh->fh_size != NFS_FHSIZE)
|
|
return error;
|
|
/* assume old filehandle format */
|
|
xdev = old_decode_dev(fh->ofh_xdev);
|
|
xino = u32_to_ino_t(fh->ofh_xino);
|
|
mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
|
|
exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
|
|
}
|
|
|
|
error = nfserr_stale;
|
|
if (IS_ERR(exp)) {
|
|
trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp));
|
|
|
|
if (PTR_ERR(exp) == -ENOENT)
|
|
return error;
|
|
|
|
return nfserrno(PTR_ERR(exp));
|
|
}
|
|
|
|
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
|
|
/* Elevate privileges so that the lack of 'r' or 'x'
|
|
* permission on some parent directory will
|
|
* not stop exportfs_decode_fh from being able
|
|
* to reconnect a directory into the dentry cache.
|
|
* The same problem can affect "SUBTREECHECK" exports,
|
|
* but as nfsd_acceptable depends on correct
|
|
* access control settings being in effect, we cannot
|
|
* fix that case easily.
|
|
*/
|
|
struct cred *new = prepare_creds();
|
|
if (!new) {
|
|
error = nfserrno(-ENOMEM);
|
|
goto out;
|
|
}
|
|
new->cap_effective =
|
|
cap_raise_nfsd_set(new->cap_effective,
|
|
new->cap_permitted);
|
|
put_cred(override_creds(new));
|
|
put_cred(new);
|
|
} else {
|
|
error = nfsd_setuser_and_check_port(rqstp, exp);
|
|
if (error)
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Look up the dentry using the NFS file handle.
|
|
*/
|
|
error = nfserr_stale;
|
|
if (rqstp->rq_vers > 2)
|
|
error = nfserr_badhandle;
|
|
|
|
if (fh->fh_version != 1) {
|
|
sfid.i32.ino = fh->ofh_ino;
|
|
sfid.i32.gen = fh->ofh_generation;
|
|
sfid.i32.parent_ino = fh->ofh_dirino;
|
|
fid = &sfid;
|
|
data_left = 3;
|
|
if (fh->ofh_dirino == 0)
|
|
fileid_type = FILEID_INO32_GEN;
|
|
else
|
|
fileid_type = FILEID_INO32_GEN_PARENT;
|
|
} else
|
|
fileid_type = fh->fh_fileid_type;
|
|
|
|
if (fileid_type == FILEID_ROOT)
|
|
dentry = dget(exp->ex_path.dentry);
|
|
else {
|
|
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
|
|
data_left, fileid_type,
|
|
nfsd_acceptable, exp);
|
|
if (IS_ERR_OR_NULL(dentry)) {
|
|
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
|
|
dentry ? PTR_ERR(dentry) : -ESTALE);
|
|
switch (PTR_ERR(dentry)) {
|
|
case -ENOMEM:
|
|
case -ETIMEDOUT:
|
|
break;
|
|
default:
|
|
dentry = ERR_PTR(-ESTALE);
|
|
}
|
|
}
|
|
}
|
|
if (dentry == NULL)
|
|
goto out;
|
|
if (IS_ERR(dentry)) {
|
|
if (PTR_ERR(dentry) != -EINVAL)
|
|
error = nfserrno(PTR_ERR(dentry));
|
|
goto out;
|
|
}
|
|
|
|
if (d_is_dir(dentry) &&
|
|
(dentry->d_flags & DCACHE_DISCONNECTED)) {
|
|
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
|
|
dentry);
|
|
}
|
|
|
|
fhp->fh_dentry = dentry;
|
|
fhp->fh_export = exp;
|
|
|
|
switch (rqstp->rq_vers) {
|
|
case 4:
|
|
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
|
|
fhp->fh_no_atomic_attr = true;
|
|
break;
|
|
case 3:
|
|
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
|
|
fhp->fh_no_wcc = true;
|
|
break;
|
|
case 2:
|
|
fhp->fh_no_wcc = true;
|
|
}
|
|
|
|
return 0;
|
|
out:
|
|
exp_put(exp);
|
|
return error;
|
|
}
|
|
|
|
/**
|
|
* fh_verify - filehandle lookup and access checking
|
|
* @rqstp: pointer to current rpc request
|
|
* @fhp: filehandle to be verified
|
|
* @type: expected type of object pointed to by filehandle
|
|
* @access: type of access needed to object
|
|
*
|
|
* Look up a dentry from the on-the-wire filehandle, check the client's
|
|
* access to the export, and set the current task's credentials.
|
|
*
|
|
* Regardless of success or failure of fh_verify(), fh_put() should be
|
|
* called on @fhp when the caller is finished with the filehandle.
|
|
*
|
|
* fh_verify() may be called multiple times on a given filehandle, for
|
|
* example, when processing an NFSv4 compound. The first call will look
|
|
* up a dentry using the on-the-wire filehandle. Subsequent calls will
|
|
* skip the lookup and just perform the other checks and possibly change
|
|
* the current task's credentials.
|
|
*
|
|
* @type specifies the type of object expected using one of the S_IF*
|
|
* constants defined in include/linux/stat.h. The caller may use zero
|
|
* to indicate that it doesn't care, or a negative integer to indicate
|
|
* that it expects something not of the given type.
|
|
*
|
|
* @access is formed from the NFSD_MAY_* constants defined in
|
|
* fs/nfsd/vfs.h.
|
|
*/
|
|
__be32
|
|
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
|
|
{
|
|
struct svc_export *exp = NULL;
|
|
struct dentry *dentry;
|
|
__be32 error;
|
|
|
|
dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
|
|
|
|
if (!fhp->fh_dentry) {
|
|
error = nfsd_set_fh_dentry(rqstp, fhp);
|
|
if (error)
|
|
goto out;
|
|
}
|
|
dentry = fhp->fh_dentry;
|
|
exp = fhp->fh_export;
|
|
/*
|
|
* We still have to do all these permission checks, even when
|
|
* fh_dentry is already set:
|
|
* - fh_verify may be called multiple times with different
|
|
* "access" arguments (e.g. nfsd_proc_create calls
|
|
* fh_verify(...,NFSD_MAY_EXEC) first, then later (in
|
|
* nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
|
|
* - in the NFSv4 case, the filehandle may have been filled
|
|
* in by fh_compose, and given a dentry, but further
|
|
* compound operations performed with that filehandle
|
|
* still need permissions checks. In the worst case, a
|
|
* mountpoint crossing may have changed the export
|
|
* options, and we may now need to use a different uid
|
|
* (for example, if different id-squashing options are in
|
|
* effect on the new filesystem).
|
|
*/
|
|
error = check_pseudo_root(rqstp, dentry, exp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = nfsd_setuser_and_check_port(rqstp, exp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = nfsd_mode_check(rqstp, dentry, type);
|
|
if (error)
|
|
goto out;
|
|
|
|
/*
|
|
* pseudoflavor restrictions are not enforced on NLM,
|
|
* which clients virtually always use auth_sys for,
|
|
* even while using RPCSEC_GSS for NFS.
|
|
*/
|
|
if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
|
|
goto skip_pseudoflavor_check;
|
|
/*
|
|
* Clients may expect to be able to use auth_sys during mount,
|
|
* even if they use gss for everything else; see section 2.3.2
|
|
* of rfc 2623.
|
|
*/
|
|
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
|
|
&& exp->ex_path.dentry == dentry)
|
|
goto skip_pseudoflavor_check;
|
|
|
|
error = check_nfsd_access(exp, rqstp);
|
|
if (error)
|
|
goto out;
|
|
|
|
skip_pseudoflavor_check:
|
|
/* Finally, check access permissions. */
|
|
error = nfsd_permission(rqstp, exp, dentry, access);
|
|
|
|
if (error) {
|
|
dprintk("fh_verify: %pd2 permission failure, "
|
|
"acc=%x, error=%d\n",
|
|
dentry,
|
|
access, ntohl(error));
|
|
}
|
|
out:
|
|
if (error == nfserr_stale)
|
|
nfsd_stats_fh_stale_inc(exp);
|
|
return error;
|
|
}
|
|
|
|
|
|
/*
|
|
* Compose a file handle for an NFS reply.
|
|
*
|
|
* Note that when first composed, the dentry may not yet have
|
|
* an inode. In this case a call to fh_update should be made
|
|
* before the fh goes out on the wire ...
|
|
*/
|
|
static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
|
|
struct dentry *dentry)
|
|
{
|
|
if (dentry != exp->ex_path.dentry) {
|
|
struct fid *fid = (struct fid *)
|
|
(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
|
|
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
|
|
int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
|
|
|
|
fhp->fh_handle.fh_fileid_type =
|
|
exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck);
|
|
fhp->fh_handle.fh_size += maxsize * 4;
|
|
} else {
|
|
fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* for composing old style file handles
|
|
*/
|
|
static inline void _fh_update_old(struct dentry *dentry,
|
|
struct svc_export *exp,
|
|
struct knfsd_fh *fh)
|
|
{
|
|
fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
|
|
fh->ofh_generation = d_inode(dentry)->i_generation;
|
|
if (d_is_dir(dentry) ||
|
|
(exp->ex_flags & NFSEXP_NOSUBTREECHECK))
|
|
fh->ofh_dirino = 0;
|
|
}
|
|
|
|
static bool is_root_export(struct svc_export *exp)
|
|
{
|
|
return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
|
|
}
|
|
|
|
static struct super_block *exp_sb(struct svc_export *exp)
|
|
{
|
|
return exp->ex_path.dentry->d_sb;
|
|
}
|
|
|
|
static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
|
|
{
|
|
switch (fsid_type) {
|
|
case FSID_DEV:
|
|
if (!old_valid_dev(exp_sb(exp)->s_dev))
|
|
return false;
|
|
fallthrough;
|
|
case FSID_MAJOR_MINOR:
|
|
case FSID_ENCODE_DEV:
|
|
return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
|
|
case FSID_NUM:
|
|
return exp->ex_flags & NFSEXP_FSID;
|
|
case FSID_UUID8:
|
|
case FSID_UUID16:
|
|
if (!is_root_export(exp))
|
|
return false;
|
|
fallthrough;
|
|
case FSID_UUID4_INUM:
|
|
case FSID_UUID16_INUM:
|
|
return exp->ex_uuid != NULL;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
|
|
{
|
|
u8 version;
|
|
u8 fsid_type;
|
|
retry:
|
|
version = 1;
|
|
if (ref_fh && ref_fh->fh_export == exp) {
|
|
version = ref_fh->fh_handle.fh_version;
|
|
fsid_type = ref_fh->fh_handle.fh_fsid_type;
|
|
|
|
ref_fh = NULL;
|
|
|
|
switch (version) {
|
|
case 0xca:
|
|
fsid_type = FSID_DEV;
|
|
break;
|
|
case 1:
|
|
break;
|
|
default:
|
|
goto retry;
|
|
}
|
|
|
|
/*
|
|
* As the fsid -> filesystem mapping was guided by
|
|
* user-space, there is no guarantee that the filesystem
|
|
* actually supports that fsid type. If it doesn't we
|
|
* loop around again without ref_fh set.
|
|
*/
|
|
if (!fsid_type_ok_for_exp(fsid_type, exp))
|
|
goto retry;
|
|
} else if (exp->ex_flags & NFSEXP_FSID) {
|
|
fsid_type = FSID_NUM;
|
|
} else if (exp->ex_uuid) {
|
|
if (fhp->fh_maxsize >= 64) {
|
|
if (is_root_export(exp))
|
|
fsid_type = FSID_UUID16;
|
|
else
|
|
fsid_type = FSID_UUID16_INUM;
|
|
} else {
|
|
if (is_root_export(exp))
|
|
fsid_type = FSID_UUID8;
|
|
else
|
|
fsid_type = FSID_UUID4_INUM;
|
|
}
|
|
} else if (!old_valid_dev(exp_sb(exp)->s_dev))
|
|
/* for newer device numbers, we must use a newer fsid format */
|
|
fsid_type = FSID_ENCODE_DEV;
|
|
else
|
|
fsid_type = FSID_DEV;
|
|
fhp->fh_handle.fh_version = version;
|
|
if (version)
|
|
fhp->fh_handle.fh_fsid_type = fsid_type;
|
|
}
|
|
|
|
__be32
|
|
fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
|
|
struct svc_fh *ref_fh)
|
|
{
|
|
/* ref_fh is a reference file handle.
|
|
* if it is non-null and for the same filesystem, then we should compose
|
|
* a filehandle which is of the same version, where possible.
|
|
* Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
|
|
* Then create a 32byte filehandle using nfs_fhbase_old
|
|
*
|
|
*/
|
|
|
|
struct inode * inode = d_inode(dentry);
|
|
dev_t ex_dev = exp_sb(exp)->s_dev;
|
|
|
|
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
|
|
MAJOR(ex_dev), MINOR(ex_dev),
|
|
(long) d_inode(exp->ex_path.dentry)->i_ino,
|
|
dentry,
|
|
(inode ? inode->i_ino : 0));
|
|
|
|
/* Choose filehandle version and fsid type based on
|
|
* the reference filehandle (if it is in the same export)
|
|
* or the export options.
|
|
*/
|
|
set_version_and_fsid_type(fhp, exp, ref_fh);
|
|
|
|
/* If we have a ref_fh, then copy the fh_no_wcc setting from it. */
|
|
fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false;
|
|
|
|
if (ref_fh == fhp)
|
|
fh_put(ref_fh);
|
|
|
|
if (fhp->fh_locked || fhp->fh_dentry) {
|
|
printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
|
|
dentry);
|
|
}
|
|
if (fhp->fh_maxsize < NFS_FHSIZE)
|
|
printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
|
|
fhp->fh_maxsize,
|
|
dentry);
|
|
|
|
fhp->fh_dentry = dget(dentry); /* our internal copy */
|
|
fhp->fh_export = exp_get(exp);
|
|
|
|
if (fhp->fh_handle.fh_version == 0xca) {
|
|
/* old style filehandle please */
|
|
memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
|
|
fhp->fh_handle.fh_size = NFS_FHSIZE;
|
|
fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
|
|
fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
|
|
fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
|
|
fhp->fh_handle.ofh_xino =
|
|
ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
|
|
fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
|
|
if (inode)
|
|
_fh_update_old(dentry, exp, &fhp->fh_handle);
|
|
} else {
|
|
fhp->fh_handle.fh_size =
|
|
key_len(fhp->fh_handle.fh_fsid_type) + 4;
|
|
fhp->fh_handle.fh_auth_type = 0;
|
|
|
|
mk_fsid(fhp->fh_handle.fh_fsid_type,
|
|
fhp->fh_handle.fh_fsid,
|
|
ex_dev,
|
|
d_inode(exp->ex_path.dentry)->i_ino,
|
|
exp->ex_fsid, exp->ex_uuid);
|
|
|
|
if (inode)
|
|
_fh_update(fhp, exp, dentry);
|
|
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
|
|
fh_put(fhp);
|
|
return nfserr_opnotsupp;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Update file handle information after changing a dentry.
|
|
* This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
|
|
*/
|
|
__be32
|
|
fh_update(struct svc_fh *fhp)
|
|
{
|
|
struct dentry *dentry;
|
|
|
|
if (!fhp->fh_dentry)
|
|
goto out_bad;
|
|
|
|
dentry = fhp->fh_dentry;
|
|
if (d_really_is_negative(dentry))
|
|
goto out_negative;
|
|
if (fhp->fh_handle.fh_version != 1) {
|
|
_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
|
|
} else {
|
|
if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
|
|
return 0;
|
|
|
|
_fh_update(fhp, fhp->fh_export, dentry);
|
|
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
|
|
return nfserr_opnotsupp;
|
|
}
|
|
return 0;
|
|
out_bad:
|
|
printk(KERN_ERR "fh_update: fh not verified!\n");
|
|
return nfserr_serverfault;
|
|
out_negative:
|
|
printk(KERN_ERR "fh_update: %pd2 still negative!\n",
|
|
dentry);
|
|
return nfserr_serverfault;
|
|
}
|
|
|
|
/*
|
|
* Release a file handle.
|
|
*/
|
|
void
|
|
fh_put(struct svc_fh *fhp)
|
|
{
|
|
struct dentry * dentry = fhp->fh_dentry;
|
|
struct svc_export * exp = fhp->fh_export;
|
|
if (dentry) {
|
|
fh_unlock(fhp);
|
|
fhp->fh_dentry = NULL;
|
|
dput(dentry);
|
|
fh_clear_wcc(fhp);
|
|
}
|
|
fh_drop_write(fhp);
|
|
if (exp) {
|
|
exp_put(exp);
|
|
fhp->fh_export = NULL;
|
|
}
|
|
fhp->fh_no_wcc = false;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Shorthand for dprintk()'s
|
|
*/
|
|
char * SVCFH_fmt(struct svc_fh *fhp)
|
|
{
|
|
struct knfsd_fh *fh = &fhp->fh_handle;
|
|
|
|
static char buf[80];
|
|
sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
|
|
fh->fh_size,
|
|
fh->fh_base.fh_pad[0],
|
|
fh->fh_base.fh_pad[1],
|
|
fh->fh_base.fh_pad[2],
|
|
fh->fh_base.fh_pad[3],
|
|
fh->fh_base.fh_pad[4],
|
|
fh->fh_base.fh_pad[5]);
|
|
return buf;
|
|
}
|
|
|
|
enum fsid_source fsid_source(struct svc_fh *fhp)
|
|
{
|
|
if (fhp->fh_handle.fh_version != 1)
|
|
return FSIDSOURCE_DEV;
|
|
switch(fhp->fh_handle.fh_fsid_type) {
|
|
case FSID_DEV:
|
|
case FSID_ENCODE_DEV:
|
|
case FSID_MAJOR_MINOR:
|
|
if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
|
|
return FSIDSOURCE_DEV;
|
|
break;
|
|
case FSID_NUM:
|
|
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
|
|
return FSIDSOURCE_FSID;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
/* either a UUID type filehandle, or the filehandle doesn't
|
|
* match the export.
|
|
*/
|
|
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
|
|
return FSIDSOURCE_FSID;
|
|
if (fhp->fh_export->ex_uuid)
|
|
return FSIDSOURCE_UUID;
|
|
return FSIDSOURCE_DEV;
|
|
}
|