From fba27cf005038a5fedf001b49636594819dbb1d5 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 12 Jan 2024 10:32:58 +0800 Subject: [PATCH 1/7] exfat: drop ->i_size_ondisk ->i_size_ondisk is no longer used by exfat_write_begin() after commit(11a347fb6cef exfat: change to get file size from DataLength), drop it. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 10 +++++----- fs/exfat/file.c | 8 ++------ fs/exfat/inode.c | 17 +++++------------ fs/exfat/namei.c | 1 - fs/exfat/super.c | 1 - 5 files changed, 12 insertions(+), 25 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index ecc5db952deb..77cc62b026d3 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -309,11 +309,6 @@ struct exfat_inode_info { /* for avoiding the race between alloc and free */ unsigned int cache_valid_id; - /* - * NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access. - * physically allocated size. - */ - loff_t i_size_ondisk; /* block-aligned i_size (used in cont_write_begin) */ loff_t i_size_aligned; /* on-disk position of directory entry or 0 */ @@ -417,6 +412,11 @@ static inline bool is_valid_cluster(struct exfat_sb_info *sbi, return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; } +static inline loff_t exfat_ondisk_size(const struct inode *inode) +{ + return ((loff_t)inode->i_blocks) << 9; +} + /* super.c */ int exfat_set_volume_dirty(struct super_block *sb); int exfat_clear_volume_dirty(struct super_block *sb); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index e19469e88000..fd4a8a170c1f 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -29,7 +29,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) if (ret) return ret; - num_clusters = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi); if (new_num_clusters == num_clusters) @@ -75,7 +75,6 @@ out: i_size_write(inode, size); ei->i_size_aligned = round_up(size, sb->s_blocksize); - ei->i_size_ondisk = ei->i_size_aligned; inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; mark_inode_dirty(inode); @@ -159,7 +158,7 @@ int __exfat_truncate(struct inode *inode) exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); - num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters_phys = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); @@ -270,9 +269,6 @@ write_size: aligned_size++; } - if (ei->i_size_ondisk > i_size_read(inode)) - ei->i_size_ondisk = aligned_size; - if (ei->i_size_aligned > i_size_read(inode)) ei->i_size_aligned = aligned_size; mutex_unlock(&sbi->s_lock); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 05f0e07b01d0..ed737ef288d2 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -130,11 +130,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); unsigned int local_clu_offset = clu_offset; - unsigned int num_to_be_allocated = 0, num_clusters = 0; + unsigned int num_to_be_allocated = 0, num_clusters; - if (ei->i_size_ondisk > 0) - num_clusters = - EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); if (clu_offset >= num_clusters) num_to_be_allocated = clu_offset - num_clusters + 1; @@ -268,10 +266,10 @@ static int exfat_map_new_buffer(struct exfat_inode_info *ei, set_buffer_new(bh); /* - * Adjust i_size_aligned if i_size_ondisk is bigger than it. + * Adjust i_size_aligned if ondisk_size is bigger than it. */ - if (ei->i_size_ondisk > ei->i_size_aligned) - ei->i_size_aligned = ei->i_size_ondisk; + if (exfat_ondisk_size(&ei->vfs_inode) > ei->i_size_aligned) + ei->i_size_aligned = exfat_ondisk_size(&ei->vfs_inode); return 0; } @@ -317,10 +315,6 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, max_blocks = min(mapped_blocks, max_blocks); pos = EXFAT_BLK_TO_B((iblock + 1), sb); - if ((create && iblock >= last_block) || buffer_delay(bh_result)) { - if (ei->i_size_ondisk < pos) - ei->i_size_ondisk = pos; - } map_bh(bh_result, sb, phys); if (buffer_delay(bh_result)) @@ -674,7 +668,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) } ei->i_size_aligned = size; - ei->i_size_ondisk = size; exfat_save_attr(inode, info->attr); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 631ad9e8e32a..6313dee5c9bb 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -372,7 +372,6 @@ static int exfat_find_empty_entry(struct inode *inode, /* directory inode should be updated in here */ i_size_write(inode, size); - ei->i_size_ondisk += sbi->cluster_size; ei->i_size_aligned += sbi->cluster_size; ei->valid_size += sbi->cluster_size; ei->flags = p_dir->flags; diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 323ecebe6f0e..6516edff38fc 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -371,7 +371,6 @@ static int exfat_read_root(struct inode *inode) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; ei->i_size_aligned = i_size_read(inode); - ei->i_size_ondisk = i_size_read(inode); exfat_save_attr(inode, EXFAT_ATTR_SUBDIR); ei->i_crtime = simple_inode_init_ts(inode); From 231eb762bbe8fa232e672e5a399eed16a0c0c45a Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 17 Sep 2024 14:58:56 +0900 Subject: [PATCH 2/7] exfat: do not fallback to buffered write After commit(11a347fb6cef exfat: change to get file size from DataLength), the remaining area or hole had been filled with zeros before calling exfat_direct_IO(), so there is no need to fallback to buffered write, and ->i_size_aligned is no longer needed, drop it. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 2 -- fs/exfat/file.c | 11 ------- fs/exfat/inode.c | 74 +++++++++------------------------------------ fs/exfat/namei.c | 1 - fs/exfat/super.c | 1 - 5 files changed, 15 insertions(+), 74 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 77cc62b026d3..5be214288a05 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -309,8 +309,6 @@ struct exfat_inode_info { /* for avoiding the race between alloc and free */ unsigned int cache_valid_id; - /* block-aligned i_size (used in cont_write_begin) */ - loff_t i_size_aligned; /* on-disk position of directory entry or 0 */ loff_t i_pos; loff_t valid_size; diff --git a/fs/exfat/file.c b/fs/exfat/file.c index fd4a8a170c1f..860c1f3b2cd7 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -74,7 +74,6 @@ out: /* Expanded range not zeroed, do not update valid_size */ i_size_write(inode, size); - ei->i_size_aligned = round_up(size, sb->s_blocksize); inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; mark_inode_dirty(inode); @@ -244,8 +243,6 @@ void exfat_truncate(struct inode *inode) struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); - unsigned int blocksize = i_blocksize(inode); - loff_t aligned_size; int err; mutex_lock(&sbi->s_lock); @@ -263,14 +260,6 @@ void exfat_truncate(struct inode *inode) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; write_size: - aligned_size = i_size_read(inode); - if (aligned_size & (blocksize - 1)) { - aligned_size |= (blocksize - 1); - aligned_size++; - } - - if (ei->i_size_aligned > i_size_read(inode)) - ei->i_size_aligned = aligned_size; mutex_unlock(&sbi->s_lock); } diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index ed737ef288d2..e7a74a948be1 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -258,21 +258,6 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, return 0; } -static int exfat_map_new_buffer(struct exfat_inode_info *ei, - struct buffer_head *bh, loff_t pos) -{ - if (buffer_delay(bh) && pos > ei->i_size_aligned) - return -EIO; - set_buffer_new(bh); - - /* - * Adjust i_size_aligned if ondisk_size is bigger than it. - */ - if (exfat_ondisk_size(&ei->vfs_inode) > ei->i_size_aligned) - ei->i_size_aligned = exfat_ondisk_size(&ei->vfs_inode); - return 0; -} - static int exfat_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -286,7 +271,6 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, sector_t last_block; sector_t phys = 0; sector_t valid_blks; - loff_t pos; mutex_lock(&sbi->s_lock); last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); @@ -314,8 +298,6 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, mapped_blocks = sbi->sect_per_clus - sec_offset; max_blocks = min(mapped_blocks, max_blocks); - pos = EXFAT_BLK_TO_B((iblock + 1), sb); - map_bh(bh_result, sb, phys); if (buffer_delay(bh_result)) clear_buffer_delay(bh_result); @@ -336,13 +318,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, } /* The area has not been written, map and mark as new. */ - err = exfat_map_new_buffer(ei, bh_result, pos); - if (err) { - exfat_fs_error(sb, - "requested for bmap out of range(pos : (%llu) > i_size_aligned(%llu)\n", - pos, ei->i_size_aligned); - goto unlock_ret; - } + set_buffer_new(bh_result); ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb); mark_inode_dirty(inode); @@ -365,7 +341,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, * The block has been partially written, * zero the unwritten part and map the block. */ - loff_t size, off; + loff_t size, off, pos; max_blocks = 1; @@ -376,7 +352,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, if (!bh_result->b_folio) goto done; - pos -= sb->s_blocksize; + pos = EXFAT_BLK_TO_B(iblock, sb); size = ei->valid_size - pos; off = pos & (PAGE_SIZE - 1); @@ -463,14 +439,6 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, int err; err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); - - if (ei->i_size_aligned < i_size_read(inode)) { - exfat_fs_error(inode->i_sb, - "invalid size(size(%llu) > aligned(%llu)\n", - i_size_read(inode), ei->i_size_aligned); - return -EIO; - } - if (err < len) exfat_write_failed(mapping, pos+len); @@ -498,20 +466,6 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) int rw = iov_iter_rw(iter); ssize_t ret; - if (rw == WRITE) { - /* - * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), - * so we need to update the ->i_size_aligned to block boundary. - * - * But we must fill the remaining area or hole by nul for - * updating ->i_size_aligned - * - * Return 0, and fallback to normal buffered write. - */ - if (EXFAT_I(inode)->i_size_aligned < size) - return 0; - } - /* * Need to use the DIO_LOCKING for avoiding the race * condition of exfat_get_block() and ->truncate(). @@ -525,8 +479,18 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else size = pos + ret; - /* zero the unwritten part in the partially written block */ - if (rw == READ && pos < ei->valid_size && ei->valid_size < size) { + if (rw == WRITE) { + /* + * If the block had been partially written before this write, + * ->valid_size will not be updated in exfat_get_block(), + * update it here. + */ + if (ei->valid_size < size) { + ei->valid_size = size; + mark_inode_dirty(inode); + } + } else if (pos < ei->valid_size && ei->valid_size < size) { + /* zero the unwritten part in the partially written block */ iov_iter_revert(iter, size - ei->valid_size); iov_iter_zero(size - ei->valid_size, iter); } @@ -661,14 +625,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) i_size_write(inode, size); - /* ondisk and aligned size should be aligned with block size */ - if (size & (inode->i_sb->s_blocksize - 1)) { - size |= (inode->i_sb->s_blocksize - 1); - size++; - } - - ei->i_size_aligned = size; - exfat_save_attr(inode, info->attr); inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 6313dee5c9bb..3e6c789a72c4 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -372,7 +372,6 @@ static int exfat_find_empty_entry(struct inode *inode, /* directory inode should be updated in here */ i_size_write(inode, size); - ei->i_size_aligned += sbi->cluster_size; ei->valid_size += sbi->cluster_size; ei->flags = p_dir->flags; inode->i_blocks += sbi->cluster_size >> 9; diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 6516edff38fc..904583f87433 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -370,7 +370,6 @@ static int exfat_read_root(struct inode *inode) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; - ei->i_size_aligned = i_size_read(inode); exfat_save_attr(inode, EXFAT_ATTR_SUBDIR); ei->i_crtime = simple_inode_init_ts(inode); From f761fcdd289d07e8547fef7ac76c3760fc7803f2 Mon Sep 17 00:00:00 2001 From: Dongliang Cui Date: Wed, 18 Sep 2024 07:40:05 +0900 Subject: [PATCH 3/7] exfat: Implement sops->shutdown and ioctl We found that when writing a large file through buffer write, if the disk is inaccessible, exFAT does not return an error normally, which leads to the writing process not stopping properly. To easily reproduce this issue, you can follow the steps below: 1. format a device to exFAT and then mount (with a full disk erase) 2. dd if=/dev/zero of=/exfat_mount/test.img bs=1M count=8192 3. eject the device You may find that the dd process does not stop immediately and may continue for a long time. The root cause of this issue is that during buffer write process, exFAT does not need to access the disk to look up directory entries or the FAT table (whereas FAT would do) every time data is written. Instead, exFAT simply marks the buffer as dirty and returns, delegating the writeback operation to the writeback process. If the disk cannot be accessed at this time, the error will only be returned to the writeback process, and the original process will not receive the error, so it cannot be returned to the user side. When the disk cannot be accessed normally, an error should be returned to stop the writing process. Implement sops->shutdown and ioctl to shut down the file system when underlying block device is marked dead. Signed-off-by: Dongliang Cui Signed-off-by: Zhiguo Niu Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 12 ++++++++++++ fs/exfat/file.c | 21 ++++++++++++++++++++ fs/exfat/inode.c | 9 +++++++++ fs/exfat/namei.c | 15 +++++++++++++++ fs/exfat/super.c | 39 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/exfat.h | 25 ++++++++++++++++++++++++ 6 files changed, 121 insertions(+) create mode 100644 include/uapi/linux/exfat.h diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 5be214288a05..3cdc1de362a9 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -10,6 +10,7 @@ #include #include #include +#include #define EXFAT_ROOT_INO 1 @@ -148,6 +149,9 @@ enum { #define DIR_CACHE_SIZE \ (DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1) +/* Superblock flags */ +#define EXFAT_FLAGS_SHUTDOWN 1 + struct exfat_dentry_namebuf { char *lfn; int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */ @@ -267,6 +271,8 @@ struct exfat_sb_info { unsigned int clu_srch_ptr; /* cluster search pointer */ unsigned int used_clusters; /* number of used clusters */ + unsigned long s_exfat_flags; /* Exfat superblock flags */ + struct mutex s_lock; /* superblock lock */ struct mutex bitmap_lock; /* bitmap lock */ struct exfat_mount_options options; @@ -331,6 +337,11 @@ static inline struct exfat_inode_info *EXFAT_I(struct inode *inode) return container_of(inode, struct exfat_inode_info, vfs_inode); } +static inline int exfat_forced_shutdown(struct super_block *sb) +{ + return test_bit(EXFAT_FLAGS_SHUTDOWN, &EXFAT_SB(sb)->s_exfat_flags); +} + /* * If ->i_mode can't hold 0222 (i.e. ATTR_RO), we use ->i_attrs to * save ATTR_RO instead of ->i_mode. @@ -459,6 +470,7 @@ int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long exfat_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int exfat_force_shutdown(struct super_block *sb, u32 flags); /* namei.c */ extern const struct dentry_operations exfat_dentry_ops; diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 860c1f3b2cd7..a945ede3bf97 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -287,6 +287,9 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, unsigned int ia_valid; int error; + if (unlikely(exfat_forced_shutdown(inode->i_sb))) + return -EIO; + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > i_size_read(inode)) { error = exfat_cont_expand(inode, attr->ia_size); @@ -470,6 +473,19 @@ static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg) return 0; } +static int exfat_ioctl_shutdown(struct super_block *sb, unsigned long arg) +{ + u32 flags; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(flags, (__u32 __user *)arg)) + return -EFAULT; + + return exfat_force_shutdown(sb, flags); +} + long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -480,6 +496,8 @@ long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return exfat_ioctl_get_attributes(inode, user_attr); case FAT_IOCTL_SET_ATTRIBUTES: return exfat_ioctl_set_attributes(filp, user_attr); + case EXFAT_IOC_SHUTDOWN: + return exfat_ioctl_shutdown(inode->i_sb, arg); case FITRIM: return exfat_ioctl_fitrim(inode, arg); default: @@ -500,6 +518,9 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) struct inode *inode = filp->f_mapping->host; int err; + if (unlikely(exfat_forced_shutdown(inode->i_sb))) + return -EIO; + err = __generic_file_fsync(filp, start, end, datasync); if (err) return err; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index e7a74a948be1..d724de8f57bf 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -102,6 +102,9 @@ int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; + if (unlikely(exfat_forced_shutdown(inode->i_sb))) + return -EIO; + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); ret = __exfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); @@ -402,6 +405,9 @@ static void exfat_readahead(struct readahead_control *rac) static int exfat_writepages(struct address_space *mapping, struct writeback_control *wbc) { + if (unlikely(exfat_forced_shutdown(mapping->host->i_sb))) + return -EIO; + return mpage_writepages(mapping, wbc, exfat_get_block); } @@ -422,6 +428,9 @@ static int exfat_write_begin(struct file *file, struct address_space *mapping, { int ret; + if (unlikely(exfat_forced_shutdown(mapping->host->i_sb))) + return -EIO; + ret = block_write_begin(mapping, pos, len, foliop, exfat_get_block); if (ret < 0) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 3e6c789a72c4..2c4c44229352 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -547,6 +547,9 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir, int err; loff_t size = i_size_read(dir); + if (unlikely(exfat_forced_shutdown(sb))) + return -EIO; + mutex_lock(&EXFAT_SB(sb)->s_lock); exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, @@ -770,6 +773,9 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) struct exfat_entry_set_cache es; int entry, err = 0; + if (unlikely(exfat_forced_shutdown(sb))) + return -EIO; + mutex_lock(&EXFAT_SB(sb)->s_lock); exfat_chain_dup(&cdir, &ei->dir); entry = ei->entry; @@ -823,6 +829,9 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir, int err; loff_t size = i_size_read(dir); + if (unlikely(exfat_forced_shutdown(sb))) + return -EIO; + mutex_lock(&EXFAT_SB(sb)->s_lock); exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, @@ -913,6 +922,9 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) struct exfat_entry_set_cache es; int entry, err; + if (unlikely(exfat_forced_shutdown(sb))) + return -EIO; + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); exfat_chain_dup(&cdir, &ei->dir); @@ -980,6 +992,9 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, struct exfat_entry_set_cache old_es, new_es; int sync = IS_DIRSYNC(inode); + if (unlikely(exfat_forced_shutdown(sb))) + return -EIO; + num_new_entries = exfat_calc_num_entries(p_uniname); if (num_new_entries < 0) return num_new_entries; diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 904583f87433..bd57844414aa 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -46,6 +46,9 @@ static int exfat_sync_fs(struct super_block *sb, int wait) struct exfat_sb_info *sbi = EXFAT_SB(sb); int err = 0; + if (unlikely(exfat_forced_shutdown(sb))) + return 0; + if (!wait) return 0; @@ -167,6 +170,41 @@ static int exfat_show_options(struct seq_file *m, struct dentry *root) return 0; } +int exfat_force_shutdown(struct super_block *sb, u32 flags) +{ + int ret; + struct exfat_sb_info *sbi = sb->s_fs_info; + struct exfat_mount_options *opts = &sbi->options; + + if (exfat_forced_shutdown(sb)) + return 0; + + switch (flags) { + case EXFAT_GOING_DOWN_DEFAULT: + case EXFAT_GOING_DOWN_FULLSYNC: + ret = bdev_freeze(sb->s_bdev); + if (ret) + return ret; + bdev_thaw(sb->s_bdev); + set_bit(EXFAT_FLAGS_SHUTDOWN, &sbi->s_exfat_flags); + break; + case EXFAT_GOING_DOWN_NOSYNC: + set_bit(EXFAT_FLAGS_SHUTDOWN, &sbi->s_exfat_flags); + break; + default: + return -EINVAL; + } + + if (opts->discard) + opts->discard = 0; + return 0; +} + +static void exfat_shutdown(struct super_block *sb) +{ + exfat_force_shutdown(sb, EXFAT_GOING_DOWN_NOSYNC); +} + static struct inode *exfat_alloc_inode(struct super_block *sb) { struct exfat_inode_info *ei; @@ -193,6 +231,7 @@ static const struct super_operations exfat_sops = { .sync_fs = exfat_sync_fs, .statfs = exfat_statfs, .show_options = exfat_show_options, + .shutdown = exfat_shutdown, }; enum { diff --git a/include/uapi/linux/exfat.h b/include/uapi/linux/exfat.h new file mode 100644 index 000000000000..46d95b16fc4b --- /dev/null +++ b/include/uapi/linux/exfat.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2024 Unisoc Technologies Co., Ltd. + */ + +#ifndef _UAPI_LINUX_EXFAT_H +#define _UAPI_LINUX_EXFAT_H +#include +#include + +/* + * exfat-specific ioctl commands + */ + +#define EXFAT_IOC_SHUTDOWN _IOR('X', 125, __u32) + +/* + * Flags used by EXFAT_IOC_SHUTDOWN + */ + +#define EXFAT_GOING_DOWN_DEFAULT 0x0 /* default with full sync */ +#define EXFAT_GOING_DOWN_FULLSYNC 0x1 /* going down with full sync*/ +#define EXFAT_GOING_DOWN_NOSYNC 0x2 /* going down */ + +#endif /* _UAPI_LINUX_EXFAT_H */ From d2b537b3e533f28e0d97293fe9293161fe8cd137 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 3 Sep 2024 15:01:09 +0800 Subject: [PATCH 4/7] exfat: fix memory leak in exfat_load_bitmap() If the first directory entry in the root directory is not a bitmap directory entry, 'bh' will not be released and reassigned, which will cause a memory leak. Fixes: 1e49a94cf707 ("exfat: add bitmap operations") Cc: stable@vger.kernel.org Signed-off-by: Yuezhang Mo Reviewed-by: Aoyama Wataru Signed-off-by: Namjae Jeon --- fs/exfat/balloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 0356c88252bd..ce9be95c9172 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -91,11 +91,8 @@ int exfat_load_bitmap(struct super_block *sb) return -EIO; type = exfat_get_entry_type(ep); - if (type == TYPE_UNUSED) - break; - if (type != TYPE_BITMAP) - continue; - if (ep->dentry.bitmap.flags == 0x0) { + if (type == TYPE_BITMAP && + ep->dentry.bitmap.flags == 0x0) { int err; err = exfat_allocate_bitmap(sb, ep); @@ -103,6 +100,9 @@ int exfat_load_bitmap(struct super_block *sb) return err; } brelse(bh); + + if (type == TYPE_UNUSED) + return -EINVAL; } if (exfat_get_next_cluster(sb, &clu.dir)) From 6630ea49103c3d45461e29b0f6eb0ce750aeb8f5 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 23 Sep 2024 21:37:32 +0900 Subject: [PATCH 5/7] exfat: move extend valid_size into ->page_mkwrite() It is not a good way to extend valid_size to the end of the mmap area by writing zeros in mmap. Because after calling mmap, no data may be written, or only a small amount of data may be written to the head of the mmap area. This commit moves extending valid_size to exfat_page_mkwrite(). In exfat_page_mkwrite() only extend valid_size to the starting position of new data writing, which reduces unnecessary writing of zeros. If the block is not mapped and is marked as new after being mapped for writing, block_write_begin() will zero the page cache corresponding to the block, so there is no need to call zero_user_segment() in exfat_file_zeroed_range(). And after moving extending valid_size to exfat_page_mkwrite(), the data written by mmap will be copied to the page cache but the page cache may be not mapped to the disk. Calling zero_user_segment() will cause the data written by mmap to be cleared. So this commit removes calling zero_user_segment() from exfat_file_zeroed_range() and renames exfat_file_zeroed_range() to exfat_extend_valid_size(). Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a945ede3bf97..a25d7eb789f4 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -532,32 +532,32 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) return blkdev_issue_flush(inode->i_sb->s_bdev); } -static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end) +static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size) { int err; + loff_t pos; struct inode *inode = file_inode(file); + struct exfat_inode_info *ei = EXFAT_I(inode); struct address_space *mapping = inode->i_mapping; const struct address_space_operations *ops = mapping->a_ops; - while (start < end) { - u32 zerofrom, len; + pos = ei->valid_size; + while (pos < new_valid_size) { + u32 len; struct folio *folio; - zerofrom = start & (PAGE_SIZE - 1); - len = PAGE_SIZE - zerofrom; - if (start + len > end) - len = end - start; + len = PAGE_SIZE - (pos & (PAGE_SIZE - 1)); + if (pos + len > new_valid_size) + len = new_valid_size - pos; - err = ops->write_begin(file, mapping, start, len, &folio, NULL); + err = ops->write_begin(file, mapping, pos, len, &folio, NULL); if (err) goto out; - folio_zero_range(folio, offset_in_folio(folio, start), len); - - err = ops->write_end(file, mapping, start, len, len, folio, NULL); + err = ops->write_end(file, mapping, pos, len, len, folio, NULL); if (err < 0) goto out; - start += len; + pos += len; balance_dirty_pages_ratelimited(mapping); cond_resched(); @@ -585,7 +585,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto unlock; if (pos > valid_size) { - ret = exfat_file_zeroed_range(file, valid_size, pos); + ret = exfat_extend_valid_size(file, pos); if (ret < 0 && ret != -ENOSPC) { exfat_err(inode->i_sb, "write: fail to zero from %llu to %llu(%zd)", @@ -619,26 +619,46 @@ unlock: return ret; } -static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) +static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf) { - int ret; + int err; + struct vm_area_struct *vma = vmf->vma; + struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct exfat_inode_info *ei = EXFAT_I(inode); - loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - loff_t end = min_t(loff_t, i_size_read(inode), + loff_t start, end; + + if (!inode_trylock(inode)) + return VM_FAULT_RETRY; + + start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + end = min_t(loff_t, i_size_read(inode), start + vma->vm_end - vma->vm_start); - if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) { - ret = exfat_file_zeroed_range(file, ei->valid_size, end); - if (ret < 0) { - exfat_err(inode->i_sb, - "mmap: fail to zero from %llu to %llu(%d)", - start, end, ret); - return ret; + if (ei->valid_size < end) { + err = exfat_extend_valid_size(file, end); + if (err < 0) { + inode_unlock(inode); + return vmf_fs_error(err); } } - return generic_file_mmap(file, vma); + inode_unlock(inode); + + return filemap_page_mkwrite(vmf); +} + +static const struct vm_operations_struct exfat_file_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = exfat_page_mkwrite, +}; + +static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &exfat_file_vm_ops; + return 0; } const struct file_operations exfat_file_operations = { From c290fe508eee36df1640c3cb35dc8f89e073c8a8 Mon Sep 17 00:00:00 2001 From: Daniel Yang Date: Mon, 16 Sep 2024 16:05:06 -0700 Subject: [PATCH 6/7] exfat: resolve memory leak from exfat_create_upcase_table() If exfat_load_upcase_table reaches end and returns -EINVAL, allocated memory doesn't get freed and while exfat_load_default_upcase_table allocates more memory, leading to a memory leak. Here's link to syzkaller crash report illustrating this issue: https://syzkaller.appspot.com/text?tag=CrashReport&x=1406c201980000 Reported-by: syzbot+e1c69cadec0f1a078e3d@syzkaller.appspotmail.com Fixes: a13d1a4de3b0 ("exfat: move freeing sbi, upcase table and dropping nls into rcu-delayed helper") Cc: stable@vger.kernel.org Signed-off-by: Daniel Yang Signed-off-by: Namjae Jeon --- fs/exfat/nls.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index afdf13c34ff5..1ac011088ce7 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -779,8 +779,11 @@ int exfat_create_upcase_table(struct super_block *sb) le32_to_cpu(ep->dentry.upcase.checksum)); brelse(bh); - if (ret && ret != -EIO) + if (ret && ret != -EIO) { + /* free memory from exfat_load_upcase_table call */ + exfat_free_upcase_table(sbi); goto load_default; + } /* load successfully */ return ret; From cb7d85014fb1ca3387f7ff5f6067337b3d7f3c5a Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Sat, 14 Sep 2024 09:42:01 +0800 Subject: [PATCH 7/7] MAINTAINERS: exfat: add myself as reviewer I have been contributing to exfat for sometime and I would like to help with code reviews as well. Signed-off-by: Yuezhang Mo Acked-by: Sungjong Seo Signed-off-by: Namjae Jeon --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5432239f3c5d..508ade6eee78 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8415,6 +8415,7 @@ N: binfmt EXFAT FILE SYSTEM M: Namjae Jeon M: Sungjong Seo +R: Yuezhang Mo L: linux-fsdevel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git