mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 23:51:37 +00:00
9b378f6ad4
The readdir implementation currently processes always up to the last index it finds. This however can result in an infinite loop if the directory has a large number of entries such that they won't all fit in the given buffer passed to the readdir callback, that is, dir_emit() returns a non-zero value. Because in that case readdir() will be called again and if in the meanwhile new directory entries were added and we still can't put all the remaining entries in the buffer, we keep repeating this over and over. The following C program and test script reproduce the problem: $ cat /mnt/readdir_prog.c #include <sys/types.h> #include <dirent.h> #include <stdio.h> int main(int argc, char *argv[]) { DIR *dir = opendir("."); struct dirent *dd; while ((dd = readdir(dir))) { printf("%s\n", dd->d_name); rename(dd->d_name, "TEMPFILE"); rename("TEMPFILE", dd->d_name); } closedir(dir); } $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV &> /dev/null #mkfs.xfs -f $DEV &> /dev/null #mkfs.ext4 -F $DEV &> /dev/null mount $DEV $MNT mkdir $MNT/testdir for ((i = 1; i <= 2000; i++)); do echo -n > $MNT/testdir/file_$i done cd $MNT/testdir /mnt/readdir_prog cd /mnt umount $MNT This behaviour is surprising to applications and it's unlike ext4, xfs, tmpfs, vfat and other filesystems, which always finish. In this case where new entries were added due to renames, some file names may be reported more than once, but this varies according to each filesystem - for example ext4 never reported the same file more than once while xfs reports the first 13 file names twice. So change our readdir implementation to track the last index number when opendir() is called and then make readdir() never process beyond that index number. This gives the same behaviour as ext4. Reported-by: Rob Landley <rob@landley.net> Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681 CC: stable@vger.kernel.org # 6.4+ Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
178 lines
5.4 KiB
C
178 lines
5.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2011 Fujitsu. All rights reserved.
|
|
* Written by Miao Xie <miaox@cn.fujitsu.com>
|
|
*/
|
|
|
|
#ifndef BTRFS_DELAYED_INODE_H
|
|
#define BTRFS_DELAYED_INODE_H
|
|
|
|
#include <linux/rbtree.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/list.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/refcount.h>
|
|
#include "ctree.h"
|
|
|
|
enum btrfs_delayed_item_type {
|
|
BTRFS_DELAYED_INSERTION_ITEM,
|
|
BTRFS_DELAYED_DELETION_ITEM
|
|
};
|
|
|
|
struct btrfs_delayed_root {
|
|
spinlock_t lock;
|
|
struct list_head node_list;
|
|
/*
|
|
* Used for delayed nodes which is waiting to be dealt with by the
|
|
* worker. If the delayed node is inserted into the work queue, we
|
|
* drop it from this list.
|
|
*/
|
|
struct list_head prepare_list;
|
|
atomic_t items; /* for delayed items */
|
|
atomic_t items_seq; /* for delayed items */
|
|
int nodes; /* for delayed nodes */
|
|
wait_queue_head_t wait;
|
|
};
|
|
|
|
#define BTRFS_DELAYED_NODE_IN_LIST 0
|
|
#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
|
|
#define BTRFS_DELAYED_NODE_DEL_IREF 2
|
|
|
|
struct btrfs_delayed_node {
|
|
u64 inode_id;
|
|
u64 bytes_reserved;
|
|
struct btrfs_root *root;
|
|
/* Used to add the node into the delayed root's node list. */
|
|
struct list_head n_list;
|
|
/*
|
|
* Used to add the node into the prepare list, the nodes in this list
|
|
* is waiting to be dealt with by the async worker.
|
|
*/
|
|
struct list_head p_list;
|
|
struct rb_root_cached ins_root;
|
|
struct rb_root_cached del_root;
|
|
struct mutex mutex;
|
|
struct btrfs_inode_item inode_item;
|
|
refcount_t refs;
|
|
u64 index_cnt;
|
|
unsigned long flags;
|
|
int count;
|
|
/*
|
|
* The size of the next batch of dir index items to insert (if this
|
|
* node is from a directory inode). Protected by @mutex.
|
|
*/
|
|
u32 curr_index_batch_size;
|
|
/*
|
|
* Number of leaves reserved for inserting dir index items (if this
|
|
* node belongs to a directory inode). This may be larger then the
|
|
* actual number of leaves we end up using. Protected by @mutex.
|
|
*/
|
|
u32 index_item_leaves;
|
|
};
|
|
|
|
struct btrfs_delayed_item {
|
|
struct rb_node rb_node;
|
|
/* Offset value of the corresponding dir index key. */
|
|
u64 index;
|
|
struct list_head tree_list; /* used for batch insert/delete items */
|
|
struct list_head readdir_list; /* used for readdir items */
|
|
/*
|
|
* Used when logging a directory.
|
|
* Insertions and deletions to this list are protected by the parent
|
|
* delayed node's mutex.
|
|
*/
|
|
struct list_head log_list;
|
|
u64 bytes_reserved;
|
|
struct btrfs_delayed_node *delayed_node;
|
|
refcount_t refs;
|
|
enum btrfs_delayed_item_type type:8;
|
|
/*
|
|
* Track if this delayed item was already logged.
|
|
* Protected by the mutex of the parent delayed inode.
|
|
*/
|
|
bool logged;
|
|
/* The maximum leaf size is 64K, so u16 is more than enough. */
|
|
u16 data_len;
|
|
char data[];
|
|
};
|
|
|
|
static inline void btrfs_init_delayed_root(
|
|
struct btrfs_delayed_root *delayed_root)
|
|
{
|
|
atomic_set(&delayed_root->items, 0);
|
|
atomic_set(&delayed_root->items_seq, 0);
|
|
delayed_root->nodes = 0;
|
|
spin_lock_init(&delayed_root->lock);
|
|
init_waitqueue_head(&delayed_root->wait);
|
|
INIT_LIST_HEAD(&delayed_root->node_list);
|
|
INIT_LIST_HEAD(&delayed_root->prepare_list);
|
|
}
|
|
|
|
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
|
const char *name, int name_len,
|
|
struct btrfs_inode *dir,
|
|
struct btrfs_disk_key *disk_key, u8 flags,
|
|
u64 index);
|
|
|
|
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
|
struct btrfs_inode *dir, u64 index);
|
|
|
|
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
|
|
|
|
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans);
|
|
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr);
|
|
|
|
void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
|
|
|
|
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
|
|
struct btrfs_inode *inode);
|
|
/* Used for evicting the inode. */
|
|
void btrfs_remove_delayed_node(struct btrfs_inode *inode);
|
|
void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode);
|
|
int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
|
|
|
|
|
|
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_inode *inode);
|
|
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
|
|
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
|
|
|
|
/* Used for drop dead root */
|
|
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
|
|
|
|
/* Used for clean the transaction */
|
|
void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
|
|
|
|
/* Used for readdir() */
|
|
bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
|
u64 last_index,
|
|
struct list_head *ins_list,
|
|
struct list_head *del_list);
|
|
void btrfs_readdir_put_delayed_items(struct inode *inode,
|
|
struct list_head *ins_list,
|
|
struct list_head *del_list);
|
|
int btrfs_should_delete_dir_index(struct list_head *del_list,
|
|
u64 index);
|
|
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
|
struct list_head *ins_list);
|
|
|
|
/* Used during directory logging. */
|
|
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
|
|
struct list_head *ins_list,
|
|
struct list_head *del_list);
|
|
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
|
|
struct list_head *ins_list,
|
|
struct list_head *del_list);
|
|
|
|
/* for init */
|
|
int __init btrfs_delayed_inode_init(void);
|
|
void __cold btrfs_delayed_inode_exit(void);
|
|
|
|
/* for debugging */
|
|
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);
|
|
|
|
#endif
|