linux/fs/xfs/xfs_icache.h
Dave Chinner ab23a77687 xfs: per-cpu deferred inode inactivation queues
Move inode inactivation to background work contexts so that it no
longer runs in the context that releases the final reference to an
inode. This will allow process work that ends up blocking on
inactivation to continue doing work while the filesytem processes
the inactivation in the background.

A typical demonstration of this is unlinking an inode with lots of
extents. The extents are removed during inactivation, so this blocks
the process that unlinked the inode from the directory structure. By
moving the inactivation to the background process, the userspace
applicaiton can keep working (e.g. unlinking the next inode in the
directory) while the inactivation work on the previous inode is
done by a different CPU.

The implementation of the queue is relatively simple. We use a
per-cpu lockless linked list (llist) to queue inodes for
inactivation without requiring serialisation mechanisms, and a work
item to allow the queue to be processed by a CPU bound worker
thread. We also keep a count of the queue depth so that we can
trigger work after a number of deferred inactivations have been
queued.

The use of a bound workqueue with a single work depth allows the
workqueue to run one work item per CPU. We queue the work item on
the CPU we are currently running on, and so this essentially gives
us affine per-cpu worker threads for the per-cpu queues. THis
maintains the effective CPU affinity that occurs within XFS at the
AG level due to all objects in a directory being local to an AG.
Hence inactivation work tends to run on the same CPU that last
accessed all the objects that inactivation accesses and this
maintains hot CPU caches for unlink workloads.

A depth of 32 inodes was chosen to match the number of inodes in an
inode cluster buffer. This hopefully allows sequential
allocation/unlink behaviours to defering inactivation of all the
inodes in a single cluster buffer at a time, further helping
maintain hot CPU and buffer cache accesses while running
inactivations.

A hard per-cpu queue throttle of 256 inode has been set to avoid
runaway queuing when inodes that take a long to time inactivate are
being processed. For example, when unlinking inodes with large
numbers of extents that can take a lot of processing to free.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[djwong: tweak comments and tracepoints, convert opflags to state bits]
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2021-08-06 11:05:39 -07:00

84 lines
2.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#ifndef XFS_SYNC_H
#define XFS_SYNC_H 1
struct xfs_mount;
struct xfs_perag;
struct xfs_icwalk {
__u32 icw_flags;
kuid_t icw_uid;
kgid_t icw_gid;
prid_t icw_prid;
__u64 icw_min_file_size;
long icw_scan_limit;
};
/* Flags that reflect xfs_fs_eofblocks functionality. */
#define XFS_ICWALK_FLAG_SYNC (1U << 0) /* sync/wait mode scan */
#define XFS_ICWALK_FLAG_UID (1U << 1) /* filter by uid */
#define XFS_ICWALK_FLAG_GID (1U << 2) /* filter by gid */
#define XFS_ICWALK_FLAG_PRID (1U << 3) /* filter by project id */
#define XFS_ICWALK_FLAG_MINFILESIZE (1U << 4) /* filter by min file size */
#define XFS_ICWALK_FLAGS_VALID (XFS_ICWALK_FLAG_SYNC | \
XFS_ICWALK_FLAG_UID | \
XFS_ICWALK_FLAG_GID | \
XFS_ICWALK_FLAG_PRID | \
XFS_ICWALK_FLAG_MINFILESIZE)
/*
* Flags for xfs_iget()
*/
#define XFS_IGET_CREATE 0x1
#define XFS_IGET_UNTRUSTED 0x2
#define XFS_IGET_DONTCACHE 0x4
#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp);
/* recovery needs direct inode allocation capability */
struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
void xfs_inode_free(struct xfs_inode *ip);
void xfs_reclaim_worker(struct work_struct *work);
void xfs_reclaim_inodes(struct xfs_mount *mp);
long xfs_reclaim_inodes_count(struct xfs_mount *mp);
long xfs_reclaim_inodes_nr(struct xfs_mount *mp, unsigned long nr_to_scan);
void xfs_inode_mark_reclaimable(struct xfs_inode *ip);
int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
unsigned int iwalk_flags);
int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
void xfs_blockgc_worker(struct work_struct *work);
int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_ino_t ino, bool *inuse);
void xfs_blockgc_stop(struct xfs_mount *mp);
void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
void xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
#endif