This is the second round of ext4 commits for 5.8 merge window. It

includes the per-inode DAX support, which was dependant on the DAX
 infrastructure which came in via the XFS tree, and a number of
 regression and bug fixes; most notably the "BUG: using
 smp_processor_id() in preemptible code in ext4_mb_new_blocks" reported
 by syzkaller.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAl7mgCcACgkQ8vlZVpUN
 gaPftwf8C4w/7SG+CYLdwg0d2u9TKk77yDuWaioFHOcMSjZvG4TCSgtMhZxQnyty
 9t4yqacILx12pCj/mZnrZp5BOSn9O2ZbuDoXNKNrFXU0BF+CsbnhvJvrrh1j/MUa
 PPtcqyGFdOLSDvHSD9xPVT76juwh79aR8vB7qnQXaEO5wcLodZWoqBEFSKCl6Bo8
 hjXs1EXidusKsoarQxW6mEITmnhU2S2fuCVDgVcoM/LmKwzbgqvlWrentq9u8qLH
 W+XbjWgUtCM1byeDZWqe5FYyyJ8x+dTv7H5an3KR92EN6hKo5AOvzA0I41pZscq/
 bJ9p2THDxJQX4rJBevGAS5mZ6hTkRw==
 =z6eO
 -----END PGP SIGNATURE-----

Merge tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull more ext4 updates from Ted Ts'o:
 "This is the second round of ext4 commits for 5.8 merge window [1].

  It includes the per-inode DAX support, which was dependant on the DAX
  infrastructure which came in via the XFS tree, and a number of
  regression and bug fixes; most notably the "BUG: using
  smp_processor_id() in preemptible code in ext4_mb_new_blocks" reported
  by syzkaller"

[1] The pull request actually came in 15 minutes after I had tagged the
    rc1 release. Tssk, tssk, late..   - Linus

* tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers
  ext4: support xattr gnu.* namespace for the Hurd
  ext4: mballoc: Use this_cpu_read instead of this_cpu_ptr
  ext4: avoid utf8_strncasecmp() with unstable name
  ext4: stop overwrite the errcode in ext4_setup_super
  ext4: fix partial cluster initialization when splitting extent
  ext4: avoid race conditions when remounting with options that change dax
  Documentation/dax: Update DAX enablement for ext4
  fs/ext4: Introduce DAX inode flag
  fs/ext4: Remove jflag variable
  fs/ext4: Make DAX mount option a tri-state
  fs/ext4: Only change S_DAX on inode load
  fs/ext4: Update ext4_should_use_dax()
  fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
  fs/ext4: Disallow verity if inode is DAX
  fs/ext4: Narrow scope of DAX check in setflags
This commit is contained in:
Linus Torvalds 2020-06-15 09:32:10 -07:00
commit 3be20b6fc1
19 changed files with 290 additions and 73 deletions

View File

@ -25,7 +25,7 @@ size when creating the filesystem.
Currently 3 filesystems support DAX: ext2, ext4 and xfs. Enabling DAX on them
is different.
Enabling DAX on ext4 and ext2
Enabling DAX on ext2
-----------------------------
When mounting the filesystem, use the "-o dax" option on the command line or
@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab. This works to enable DAX on all files
within the filesystem. It is equivalent to the '-o dax=always' behavior below.
Enabling DAX on xfs
-------------------
Enabling DAX on xfs and ext4
----------------------------
Summary
-------

View File

@ -39,3 +39,6 @@ is encrypted as well as the data itself.
Verity files cannot have blocks allocated past the end of the verity
metadata.
Verity and DAX are not compatible and attempts to set both of these flags
on a file will fail.

View File

@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
xattr_user.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o

View File

@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
char strbuf[DNAME_INLINE_LEN];
if (!inode || !IS_CASEFOLDED(inode) ||
!EXT4_SB(inode->i_sb)->s_encoding) {
@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
/*
* If the dentry name is stored in-line, then it may be concurrently
* modified by a rename. If this happens, the VFS will eventually retry
* the lookup, so it doesn't matter what ->d_compare() returns.
* However, it's unsafe to call utf8_strncasecmp() with an unstable
* string. Therefore, we have to copy the name into a temporary buffer.
*/
if (len <= DNAME_INLINE_LEN - 1) {
memcpy(strbuf, str, len);
strbuf[len] = 0;
qstr.name = strbuf;
/* prevent compiler from optimizing out the temporary buffer */
barrier();
}
return ext4_ci_compare(inode, name, &qstr, false);
}

View File

@ -426,13 +426,16 @@ struct flex_groups {
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
#define EXT4_DAX_FL 0x02000000 /* Inode is DAX */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
@ -440,14 +443,16 @@ struct flex_groups {
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
EXT4_PROJINHERIT_FL)
EXT4_PROJINHERIT_FL | \
EXT4_DAX_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
EXT4_DAX_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
@ -459,6 +464,10 @@ struct flex_groups {
/* The only flags that should be swapped */
#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
/* Flags which are mutually exclusive to DAX */
#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
EXT4_JOURNAL_DATA_FL)
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
@ -499,6 +508,7 @@ enum {
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
/* 22 was formerly EXT4_INODE_EOFBLOCKS */
EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
@ -1135,9 +1145,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#ifdef CONFIG_FS_DAX
#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
#else
#define EXT4_MOUNT_DAX 0
#define EXT4_MOUNT_DAX_ALWAYS 0
#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
@ -1180,6 +1190,8 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
*/
#define EXT4_FLAGS_RESIZING 0
#define EXT4_FLAGS_SHUTDOWN 1
#define EXT4_FLAGS_BDEV_IS_DAX 2
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
{
@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);

View File

@ -2844,7 +2844,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial.pclu = EXT4_B2C(sbi, pblk);
partial.state = nofree;
}

View File

@ -1116,7 +1116,7 @@ got:
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {

View File

@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
static bool ext4_should_use_dax(struct inode *inode)
static bool ext4_should_enable_dax(struct inode *inode)
{
if (!test_opt(inode->i_sb, DAX))
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (test_opt2(inode->i_sb, DAX_NEVER))
return false;
if (!S_ISREG(inode->i_mode))
return false;
@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
return false;
if (test_opt(inode->i_sb, DAX_ALWAYS))
return true;
return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
}
void ext4_set_inode_flags(struct inode *inode)
void ext4_set_inode_flags(struct inode *inode, bool init)
{
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;
WARN_ON_ONCE(IS_DAX(inode) && init);
if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
if (ext4_should_use_dax(inode))
/* Because of the way inode_set_flags() works we must preserve S_DAX
* here if already set. */
new_fl |= (inode->i_flags & S_DAX);
if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))

View File

@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
return 0;
}
static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
{
struct ext4_inode_info *ei = EXT4_I(inode);
if (S_ISDIR(inode->i_mode))
return;
if (test_opt2(inode->i_sb, DAX_NEVER) ||
test_opt(inode->i_sb, DAX_ALWAYS))
return;
if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
d_mark_dontcache(inode);
}
static bool dax_compatible(struct inode *inode, unsigned int oldflags,
unsigned int flags)
{
if (flags & EXT4_DAX_FL) {
if ((oldflags & EXT4_DAX_MUT_EXCL) ||
ext4_test_inode_state(inode,
EXT4_STATE_VERITY_IN_PROGRESS)) {
return false;
}
}
if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
return false;
return true;
}
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
int err = -EPERM, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
unsigned int jflag;
struct super_block *sb = inode->i_sb;
/* Is it quota file? Do not allow user to mess with it */
@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
oldflags = ei->i_flags;
/* The JOURNAL_DATA flag is modifiable only by root */
jflag = flags & EXT4_JOURNAL_DATA_FL;
err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
if (err)
goto flags_out;
@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode,
* The JOURNAL_DATA flag can only be changed by
* the relevant capability.
*/
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}
if (!dax_compatible(inode, oldflags, flags)) {
err = -EOPNOTSUPP;
goto flags_out;
}
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
migrate = 1;
@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_err;
ext4_dax_dontcache(inode, flags);
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
if (!(mask & EXT4_FL_USER_MODIFIABLE))
continue;
@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, false);
inode->i_ctime = current_time(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@ -390,17 +427,18 @@ flags_err:
if (err)
goto flags_out;
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
* Changes to the journaling mode can cause unsafe changes to
* S_DAX if we are using the DAX mount option.
* S_DAX if the inode is DAX
*/
if (test_opt(inode->i_sb, DAX)) {
if (IS_DAX(inode)) {
err = -EBUSY;
goto flags_out;
}
err = ext4_change_inode_journal_flag(inode, jflag);
err = ext4_change_inode_journal_flag(inode,
flags & EXT4_JOURNAL_DATA_FL);
if (err)
goto flags_out;
}
@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
xflags |= FS_XFLAG_NOATIME;
if (iflags & EXT4_PROJINHERIT_FL)
xflags |= FS_XFLAG_PROJINHERIT;
if (iflags & EXT4_DAX_FL)
xflags |= FS_XFLAG_DAX;
return xflags;
}
#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
FS_XFLAG_DAX)
/* Transfer xflags flags to internal */
static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
iflags |= EXT4_NOATIME_FL;
if (xflags & FS_XFLAG_PROJINHERIT)
iflags |= EXT4_PROJINHERIT_FL;
if (xflags & FS_XFLAG_DAX)
iflags |= EXT4_DAX_FL;
return iflags;
}

View File

@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
seq = *this_cpu_ptr(&discard_pa_seq);
seq = this_cpu_read(discard_pa_seq);
if (!ext4_mb_use_preallocated(ac)) {
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
ext4_mb_normalize_request(ac, ar);

View File

@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb)
smp_wmb();
sb->s_flags |= SB_RDONLY;
} else if (test_opt(sb, ERRORS_PANIC)) {
if (EXT4_SB(sb)->s_journal &&
!(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
return;
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
@ -725,24 +722,21 @@ void __ext4_abort(struct super_block *sb, const char *function,
va_end(args);
if (sb_rdonly(sb) == 0) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
* before ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
if (EXT4_SB(sb)->s_journal &&
!(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
return;
if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
panic("EXT4-fs panic from previous error\n");
}
}
void __ext4_msg(struct super_block *sb,
const char *prefix, const char *fmt, ...)
@ -1324,6 +1318,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
return -EINVAL;
if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
return -EOPNOTSUPP;
res = ext4_convert_inline_data(inode);
if (res)
return res;
@ -1349,7 +1346,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, false);
}
return res;
}
@ -1376,7 +1373,7 @@ retry:
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, false);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
@ -1514,7 +1511,8 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@ -1581,6 +1579,9 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_dax, "dax"},
{Opt_dax_always, "dax=always"},
{Opt_dax_inode, "dax=inode"},
{Opt_dax_never, "dax=never"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_warn_on_error, "warn_on_error"},
@ -1729,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
#define MOPT_SKIP 0x0800
static const struct mount_opts {
int token;
@ -1778,7 +1780,13 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
{Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
{Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
{Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@ -2123,13 +2131,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
} else if (token == Opt_dax) {
} else if (token == Opt_dax || token == Opt_dax_always ||
token == Opt_dax_inode || token == Opt_dax_never) {
#ifdef CONFIG_FS_DAX
switch (token) {
case Opt_dax:
case Opt_dax_always:
if (is_remount &&
(!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
fail_dax_change_remount:
ext4_msg(sb, KERN_ERR, "can't change "
"dax mount option while remounting");
return -1;
}
if (is_remount &&
(test_opt(sb, DATA_FLAGS) ==
EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
return -1;
}
ext4_msg(sb, KERN_WARNING,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
sbi->s_mount_opt |= m->mount_opt;
sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
break;
case Opt_dax_never:
if (is_remount &&
(!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
goto fail_dax_change_remount;
sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
break;
case Opt_dax_inode:
if (is_remount &&
((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
goto fail_dax_change_remount;
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
/* Strictly for printing options */
sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
break;
}
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
#endif
} else if (token == Opt_data_err_abort) {
@ -2293,7 +2344,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
(m->flags & MOPT_CLEAR_ERR))
(m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */
@ -2353,6 +2404,17 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
fscrypt_show_test_dummy_encryption(seq, sep, sb);
if (test_opt(sb, DAX_ALWAYS)) {
if (IS_EXT2_SB(sb))
SEQ_OPTS_PUTS("dax");
else
SEQ_OPTS_PUTS("dax=always");
} else if (test_opt2(sb, DAX_NEVER)) {
SEQ_OPTS_PUTS("dax=never");
} else if (test_opt2(sb, DAX_INODE)) {
SEQ_OPTS_PUTS("dax=inode");
}
ext4_show_quota_options(seq, sb);
return 0;
}
@ -2383,6 +2445,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
err = -EROFS;
goto done;
}
if (read_only)
goto done;
@ -4017,7 +4080,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and delalloc");
goto failed_mount;
}
if (test_opt(sb, DAX)) {
if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
goto failed_mount;
@ -4127,13 +4190,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
if (bdev_dax_supported(sb->s_bdev, blocksize))
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
goto failed_mount;
@ -5447,12 +5513,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
if (test_opt(sb, DAX)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
err = -EINVAL;
goto restore_opts;
}
} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
@ -5468,12 +5528,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
}
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");

View File

@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;
if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
return -EINVAL;
if (ext4_verity_in_progress(inode))
return -EBUSY;
@ -241,7 +244,7 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
if (err)
goto out_stop;
ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
ext4_set_inode_flags(inode);
ext4_set_inode_flags(inode, false);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
out_stop:

View File

@ -93,6 +93,7 @@ static const struct xattr_handler * const ext4_xattr_handler_map[] = {
#ifdef CONFIG_EXT4_FS_SECURITY
[EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
#endif
[EXT4_XATTR_INDEX_HURD] = &ext4_xattr_hurd_handler,
};
const struct xattr_handler *ext4_xattr_handlers[] = {
@ -105,6 +106,7 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
#ifdef CONFIG_EXT4_FS_SECURITY
&ext4_xattr_security_handler,
#endif
&ext4_xattr_hurd_handler,
NULL
};

View File

@ -124,6 +124,7 @@ struct ext4_xattr_inode_array {
extern const struct xattr_handler ext4_xattr_user_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
extern const struct xattr_handler ext4_xattr_hurd_handler;
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"

51
fs/ext4/xattr_hurd.c Normal file
View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/xattr_hurd.c
* Handler for extended gnu attributes for the Hurd.
*
* Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
* Copyright (C) 2020 by Jan (janneke) Nieuwenhuizen, <janneke@gnu.org>
*/
#include <linux/init.h>
#include <linux/string.h>
#include "ext4.h"
#include "xattr.h"
static bool
ext4_xattr_hurd_list(struct dentry *dentry)
{
return test_opt(dentry->d_sb, XATTR_USER);
}
static int
ext4_xattr_hurd_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
if (!test_opt(inode->i_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_get(inode, EXT4_XATTR_INDEX_HURD,
name, buffer, size);
}
static int
ext4_xattr_hurd_set(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
if (!test_opt(inode->i_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_set(inode, EXT4_XATTR_INDEX_HURD,
name, value, size, flags);
}
const struct xattr_handler ext4_xattr_hurd_handler = {
.prefix = XATTR_HURD_PREFIX,
.list = ext4_xattr_hurd_list,
.get = ext4_xattr_hurd_get,
.set = ext4_xattr_hurd_set,
};

View File

@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
init_waitqueue_head(&journal->j_wait_commit);
init_waitqueue_head(&journal->j_wait_updates);
init_waitqueue_head(&journal->j_wait_reserved);
mutex_init(&journal->j_abort_mutex);
mutex_init(&journal->j_barrier);
mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock);
@ -1402,6 +1403,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret,
journal->j_devname);
if (!is_journal_aborted(journal))
jbd2_journal_abort(journal, ret);
}
@ -2153,6 +2155,13 @@ void jbd2_journal_abort(journal_t *journal, int errno)
{
transaction_t *transaction;
/*
* Lock the aborting procedure until everything is done, this avoid
* races between filesystem's error handling flow (e.g. ext4_abort()),
* ensure panic after the error info is written into journal's
* superblock.
*/
mutex_lock(&journal->j_abort_mutex);
/*
* ESHUTDOWN always takes precedence because a file system check
* caused by any other journal abort error is not required after
@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
journal->j_errno = errno;
jbd2_journal_update_sb_errno(journal);
}
mutex_unlock(&journal->j_abort_mutex);
return;
}
@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
* layer could realise that a filesystem check is needed.
*/
jbd2_journal_update_sb_errno(journal);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_REC_ERR;
write_unlock(&journal->j_state_lock);
mutex_unlock(&journal->j_abort_mutex);
}
/**

View File

@ -765,6 +765,11 @@ struct journal_s
*/
int j_errno;
/**
* @j_abort_mutex: Lock the whole aborting procedure.
*/
struct mutex j_abort_mutex;
/**
* @j_sb_buffer: The first part of the superblock buffer.
*/
@ -1247,7 +1252,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
* data write error in ordered
* mode */
#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */
/*
* Function declarations for the journaling transaction and buffer

View File

@ -262,6 +262,7 @@ struct fsxattr {
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define FS_DAX_FL 0x02000000 /* Inode is DAX */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */

View File

@ -7,6 +7,7 @@
Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
Copyright (c) 2020 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
*/
#include <linux/libc-compat.h>
@ -31,6 +32,9 @@
#define XATTR_BTRFS_PREFIX "btrfs."
#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
#define XATTR_HURD_PREFIX "gnu."
#define XATTR_HURD_PREFIX_LEN (sizeof(XATTR_HURD_PREFIX) - 1)
#define XATTR_SECURITY_PREFIX "security."
#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)