2020-05-12 23:54:17 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0+ */
|
2016-08-03 01:12:25 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
|
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
|
|
*/
|
|
|
|
#ifndef __XFS_DEFER_H__
|
|
|
|
#define __XFS_DEFER_H__
|
|
|
|
|
2020-04-30 19:52:22 +00:00
|
|
|
struct xfs_btree_cur;
|
2016-08-03 01:12:25 +00:00
|
|
|
struct xfs_defer_op_type;
|
xfs: proper replay of deferred ops queued during log recovery
When we replay unfinished intent items that have been recovered from the
log, it's possible that the replay will cause the creation of more
deferred work items. As outlined in commit 509955823cc9c ("xfs: log
recovery should replay deferred ops in order"), later work items have an
implicit ordering dependency on earlier work items. Therefore, recovery
must replay the items (both recovered and created) in the same order
that they would have been during normal operation.
For log recovery, we enforce this ordering by using an empty transaction
to collect deferred ops that get created in the process of recovering a
log intent item to prevent them from being committed before the rest of
the recovered intent items. After we finish committing all the
recovered log items, we allocate a transaction with an enormous block
reservation, splice our huge list of created deferred ops into that
transaction, and commit it, thereby finishing all those ops.
This is /really/ hokey -- it's the one place in XFS where we allow
nested transactions; the splicing of the defer ops list is is inelegant
and has to be done twice per recovery function; and the broken way we
handle inode pointers and block reservations cause subtle use-after-free
and allocator problems that will be fixed by this patch and the two
patches after it.
Therefore, replace the hokey empty transaction with a structure designed
to capture each chain of deferred ops that are created as part of
recovering a single unfinished log intent. Finally, refactor the loop
that replays those chains to do so using one transaction per chain.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-09-26 00:39:37 +00:00
|
|
|
struct xfs_defer_capture;
|
2016-08-03 01:12:25 +00:00
|
|
|
|
2018-12-12 16:46:22 +00:00
|
|
|
/*
|
|
|
|
* Save a log intent item and a list of extents, so that we can replay
|
|
|
|
* whatever action had to happen to the extent list and file the log done
|
|
|
|
* item.
|
|
|
|
*/
|
|
|
|
struct xfs_defer_pending {
|
|
|
|
struct list_head dfp_list; /* pending items */
|
|
|
|
struct list_head dfp_work; /* work items */
|
2020-04-30 19:52:21 +00:00
|
|
|
struct xfs_log_item *dfp_intent; /* log intent item */
|
2020-04-30 19:52:22 +00:00
|
|
|
struct xfs_log_item *dfp_done; /* log done item */
|
2023-12-13 09:06:31 +00:00
|
|
|
const struct xfs_defer_op_type *dfp_ops;
|
2018-12-12 16:46:22 +00:00
|
|
|
unsigned int dfp_count; /* # extent items */
|
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
|
|
|
unsigned int dfp_flags;
|
2018-12-12 16:46:22 +00:00
|
|
|
};
|
|
|
|
|
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
|
|
|
/*
|
|
|
|
* Create a log intent item for this deferred item, but don't actually finish
|
|
|
|
* the work. Caller must clear this before the final transaction commit.
|
|
|
|
*/
|
|
|
|
#define XFS_DEFER_PAUSED (1U << 0)
|
|
|
|
|
|
|
|
#define XFS_DEFER_PENDING_STRINGS \
|
|
|
|
{ XFS_DEFER_PAUSED, "paused" }
|
|
|
|
|
|
|
|
void xfs_defer_item_pause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
|
|
|
void xfs_defer_item_unpause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
|
|
|
|
2023-12-13 09:06:33 +00:00
|
|
|
struct xfs_defer_pending *xfs_defer_add(struct xfs_trans *tp, struct list_head *h,
|
|
|
|
const struct xfs_defer_op_type *ops);
|
2018-07-24 20:43:15 +00:00
|
|
|
int xfs_defer_finish_noroll(struct xfs_trans **tp);
|
2018-07-24 20:43:15 +00:00
|
|
|
int xfs_defer_finish(struct xfs_trans **tp);
|
2023-11-22 19:25:45 +00:00
|
|
|
int xfs_defer_finish_one(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
2018-08-01 14:20:30 +00:00
|
|
|
void xfs_defer_cancel(struct xfs_trans *);
|
2018-08-01 14:20:30 +00:00
|
|
|
void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
|
2016-08-03 01:12:25 +00:00
|
|
|
|
|
|
|
/* Description of a deferred type. */
|
|
|
|
struct xfs_defer_op_type {
|
2023-12-13 09:06:31 +00:00
|
|
|
const char *name;
|
|
|
|
unsigned int max_items;
|
2020-04-30 19:52:21 +00:00
|
|
|
struct xfs_log_item *(*create_intent)(struct xfs_trans *tp,
|
|
|
|
struct list_head *items, unsigned int count, bool sort);
|
|
|
|
void (*abort_intent)(struct xfs_log_item *intent);
|
2020-04-30 19:52:22 +00:00
|
|
|
struct xfs_log_item *(*create_done)(struct xfs_trans *tp,
|
|
|
|
struct xfs_log_item *intent, unsigned int count);
|
|
|
|
int (*finish_item)(struct xfs_trans *tp, struct xfs_log_item *done,
|
2020-04-30 19:52:22 +00:00
|
|
|
struct list_head *item, struct xfs_btree_cur **state);
|
|
|
|
void (*finish_cleanup)(struct xfs_trans *tp,
|
|
|
|
struct xfs_btree_cur *state, int error);
|
2020-04-30 19:52:23 +00:00
|
|
|
void (*cancel_item)(struct list_head *item);
|
2023-11-22 21:39:25 +00:00
|
|
|
int (*recover_work)(struct xfs_defer_pending *dfp,
|
|
|
|
struct list_head *capture_list);
|
2023-11-30 20:31:30 +00:00
|
|
|
struct xfs_log_item *(*relog_intent)(struct xfs_trans *tp,
|
|
|
|
struct xfs_log_item *intent,
|
|
|
|
struct xfs_log_item *done_item);
|
2016-08-03 01:12:25 +00:00
|
|
|
};
|
|
|
|
|
2018-12-12 16:46:22 +00:00
|
|
|
extern const struct xfs_defer_op_type xfs_bmap_update_defer_type;
|
|
|
|
extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
|
|
|
|
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
|
|
|
|
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
|
|
|
|
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
|
2022-05-04 02:41:02 +00:00
|
|
|
extern const struct xfs_defer_op_type xfs_attr_defer_type;
|
2024-04-15 21:54:17 +00:00
|
|
|
extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
|
2016-08-03 01:12:25 +00:00
|
|
|
|
2021-09-17 00:28:06 +00:00
|
|
|
/*
|
|
|
|
* Deferred operation item relogging limits.
|
|
|
|
*/
|
2024-04-15 21:55:11 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Rename w/ parent pointers can require up to 5 inodes with deferred ops to
|
|
|
|
* be joined to the transaction: src_dp, target_dp, src_ip, target_ip, and wip.
|
|
|
|
* These inodes are locked in sorted order by their inode numbers
|
|
|
|
*/
|
|
|
|
#define XFS_DEFER_OPS_NR_INODES 5
|
2021-09-17 00:28:06 +00:00
|
|
|
#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
|
|
|
|
|
|
|
|
/* Resources that must be held across a transaction roll. */
|
|
|
|
struct xfs_defer_resources {
|
|
|
|
/* held buffers */
|
|
|
|
struct xfs_buf *dr_bp[XFS_DEFER_OPS_NR_BUFS];
|
|
|
|
|
|
|
|
/* inodes with no unlock flags */
|
|
|
|
struct xfs_inode *dr_ip[XFS_DEFER_OPS_NR_INODES];
|
|
|
|
|
|
|
|
/* number of held buffers */
|
|
|
|
unsigned short dr_bufs;
|
|
|
|
|
|
|
|
/* bitmap of ordered buffers */
|
|
|
|
unsigned short dr_ordered;
|
|
|
|
|
|
|
|
/* number of held inodes */
|
|
|
|
unsigned short dr_inos;
|
|
|
|
};
|
|
|
|
|
xfs: proper replay of deferred ops queued during log recovery
When we replay unfinished intent items that have been recovered from the
log, it's possible that the replay will cause the creation of more
deferred work items. As outlined in commit 509955823cc9c ("xfs: log
recovery should replay deferred ops in order"), later work items have an
implicit ordering dependency on earlier work items. Therefore, recovery
must replay the items (both recovered and created) in the same order
that they would have been during normal operation.
For log recovery, we enforce this ordering by using an empty transaction
to collect deferred ops that get created in the process of recovering a
log intent item to prevent them from being committed before the rest of
the recovered intent items. After we finish committing all the
recovered log items, we allocate a transaction with an enormous block
reservation, splice our huge list of created deferred ops into that
transaction, and commit it, thereby finishing all those ops.
This is /really/ hokey -- it's the one place in XFS where we allow
nested transactions; the splicing of the defer ops list is is inelegant
and has to be done twice per recovery function; and the broken way we
handle inode pointers and block reservations cause subtle use-after-free
and allocator problems that will be fixed by this patch and the two
patches after it.
Therefore, replace the hokey empty transaction with a structure designed
to capture each chain of deferred ops that are created as part of
recovering a single unfinished log intent. Finally, refactor the loop
that replays those chains to do so using one transaction per chain.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-09-26 00:39:37 +00:00
|
|
|
/*
|
|
|
|
* This structure enables a dfops user to detach the chain of deferred
|
|
|
|
* operations from a transaction so that they can be continued later.
|
|
|
|
*/
|
|
|
|
struct xfs_defer_capture {
|
|
|
|
/* List of other capture structures. */
|
|
|
|
struct list_head dfc_list;
|
|
|
|
|
|
|
|
/* Deferred ops state saved from the transaction. */
|
|
|
|
struct list_head dfc_dfops;
|
|
|
|
unsigned int dfc_tpflags;
|
2020-09-26 00:39:49 +00:00
|
|
|
|
|
|
|
/* Block reservations for the data and rt devices. */
|
|
|
|
unsigned int dfc_blkres;
|
|
|
|
unsigned int dfc_rtxres;
|
2020-09-26 00:39:50 +00:00
|
|
|
|
|
|
|
/* Log reservation saved from the transaction. */
|
|
|
|
unsigned int dfc_logres;
|
2020-09-26 00:39:51 +00:00
|
|
|
|
2021-09-17 00:28:07 +00:00
|
|
|
struct xfs_defer_resources dfc_held;
|
xfs: proper replay of deferred ops queued during log recovery
When we replay unfinished intent items that have been recovered from the
log, it's possible that the replay will cause the creation of more
deferred work items. As outlined in commit 509955823cc9c ("xfs: log
recovery should replay deferred ops in order"), later work items have an
implicit ordering dependency on earlier work items. Therefore, recovery
must replay the items (both recovered and created) in the same order
that they would have been during normal operation.
For log recovery, we enforce this ordering by using an empty transaction
to collect deferred ops that get created in the process of recovering a
log intent item to prevent them from being committed before the rest of
the recovered intent items. After we finish committing all the
recovered log items, we allocate a transaction with an enormous block
reservation, splice our huge list of created deferred ops into that
transaction, and commit it, thereby finishing all those ops.
This is /really/ hokey -- it's the one place in XFS where we allow
nested transactions; the splicing of the defer ops list is is inelegant
and has to be done twice per recovery function; and the broken way we
handle inode pointers and block reservations cause subtle use-after-free
and allocator problems that will be fixed by this patch and the two
patches after it.
Therefore, replace the hokey empty transaction with a structure designed
to capture each chain of deferred ops that are created as part of
recovering a single unfinished log intent. Finally, refactor the loop
that replays those chains to do so using one transaction per chain.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-09-26 00:39:37 +00:00
|
|
|
};
|
|
|
|
|
2020-09-21 16:15:09 +00:00
|
|
|
/*
|
|
|
|
* Functions to capture a chain of deferred operations and continue them later.
|
|
|
|
* This doesn't normally happen except log recovery.
|
|
|
|
*/
|
xfs: proper replay of deferred ops queued during log recovery
When we replay unfinished intent items that have been recovered from the
log, it's possible that the replay will cause the creation of more
deferred work items. As outlined in commit 509955823cc9c ("xfs: log
recovery should replay deferred ops in order"), later work items have an
implicit ordering dependency on earlier work items. Therefore, recovery
must replay the items (both recovered and created) in the same order
that they would have been during normal operation.
For log recovery, we enforce this ordering by using an empty transaction
to collect deferred ops that get created in the process of recovering a
log intent item to prevent them from being committed before the rest of
the recovered intent items. After we finish committing all the
recovered log items, we allocate a transaction with an enormous block
reservation, splice our huge list of created deferred ops into that
transaction, and commit it, thereby finishing all those ops.
This is /really/ hokey -- it's the one place in XFS where we allow
nested transactions; the splicing of the defer ops list is is inelegant
and has to be done twice per recovery function; and the broken way we
handle inode pointers and block reservations cause subtle use-after-free
and allocator problems that will be fixed by this patch and the two
patches after it.
Therefore, replace the hokey empty transaction with a structure designed
to capture each chain of deferred ops that are created as part of
recovering a single unfinished log intent. Finally, refactor the loop
that replays those chains to do so using one transaction per chain.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-09-26 00:39:37 +00:00
|
|
|
int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
|
2021-09-17 00:28:07 +00:00
|
|
|
struct list_head *capture_list);
|
2020-09-26 00:39:51 +00:00
|
|
|
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
|
2021-09-17 00:28:07 +00:00
|
|
|
struct xfs_defer_resources *dres);
|
2023-07-31 12:46:18 +00:00
|
|
|
void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
|
2021-09-17 00:28:07 +00:00
|
|
|
struct xfs_defer_capture *d);
|
|
|
|
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
|
2020-09-21 16:15:09 +00:00
|
|
|
|
2023-11-22 18:23:23 +00:00
|
|
|
void xfs_defer_start_recovery(struct xfs_log_item *lip,
|
2023-12-14 05:16:32 +00:00
|
|
|
struct list_head *r_dfops, const struct xfs_defer_op_type *ops);
|
2023-11-22 18:23:23 +00:00
|
|
|
void xfs_defer_cancel_recovery(struct xfs_mount *mp,
|
|
|
|
struct xfs_defer_pending *dfp);
|
2023-11-22 21:39:25 +00:00
|
|
|
int xfs_defer_finish_recovery(struct xfs_mount *mp,
|
|
|
|
struct xfs_defer_pending *dfp, struct list_head *capture_list);
|
2023-11-22 18:23:23 +00:00
|
|
|
|
2023-11-22 19:13:03 +00:00
|
|
|
static inline void
|
|
|
|
xfs_defer_add_item(
|
|
|
|
struct xfs_defer_pending *dfp,
|
|
|
|
struct list_head *work)
|
|
|
|
{
|
|
|
|
list_add_tail(work, &dfp->dfp_work);
|
|
|
|
dfp->dfp_count++;
|
|
|
|
}
|
|
|
|
|
2021-10-12 21:11:01 +00:00
|
|
|
int __init xfs_defer_init_item_caches(void);
|
|
|
|
void xfs_defer_destroy_item_caches(void);
|
|
|
|
|
2023-12-07 02:41:00 +00:00
|
|
|
void xfs_defer_add_barrier(struct xfs_trans *tp);
|
|
|
|
|
2016-08-03 01:12:25 +00:00
|
|
|
#endif /* __XFS_DEFER_H__ */
|