Merge branch 'guilt/xlog-write-rework' into xfs-5.19-for-next

This commit is contained in:
Dave Chinner 2022-04-21 16:45:52 +10:00
commit 463260d767
6 changed files with 462 additions and 596 deletions

View File

@ -69,7 +69,6 @@ static inline uint xlog_get_cycle(char *ptr)
/* Log Clients */
#define XFS_TRANSACTION 0x69
#define XFS_VOLUME 0x2
#define XFS_LOG 0xaa
#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */

File diff suppressed because it is too large Load Diff

View File

@ -21,42 +21,19 @@ struct xfs_log_vec {
#define XFS_LOG_VEC_ORDERED (-1)
static inline void *
xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
uint type)
{
struct xfs_log_iovec *vec = *vecp;
void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
uint type);
if (vec) {
ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
vec++;
} else {
vec = &lv->lv_iovecp[0];
}
vec->i_type = type;
vec->i_addr = lv->lv_buf + lv->lv_buf_len;
ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
*vecp = vec;
return vec->i_addr;
}
/*
* We need to make sure the next buffer is naturally aligned for the biggest
* basic data type we put into it. We already accounted for this padding when
* sizing the buffer.
*
* However, this padding does not get written into the log, and hence we have to
* track the space used by the log vectors separately to prevent log space hangs
* due to inaccurate accounting (i.e. a leak) of the used log space through the
* CIL context ticket.
*/
static inline void
xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
{
lv->lv_buf_len += round_up(len, sizeof(uint64_t));
struct xlog_op_header *oph = vec->i_addr;
/* opheader tracks payload length, logvec tracks region length */
oph->oh_len = cpu_to_be32(len);
len += sizeof(struct xlog_op_header);
lv->lv_buf_len += len;
lv->lv_bytes += len;
vec->i_len = len;
}
@ -117,15 +94,11 @@ int xfs_log_mount_finish(struct xfs_mount *mp);
void xfs_log_mount_cancel(struct xfs_mount *);
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
void xfs_log_space_wake(struct xfs_mount *mp);
int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
struct xlog_ticket **ticket,
uint8_t clientid,
bool permanent);
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
void xfs_log_unmount(struct xfs_mount *mp);
void xfs_log_space_wake(struct xfs_mount *mp);
int xfs_log_reserve(struct xfs_mount *mp, int length, int count,
struct xlog_ticket **ticket, bool permanent);
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
void xfs_log_unmount(struct xfs_mount *mp);
bool xfs_log_writable(struct xfs_mount *mp);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);

View File

@ -37,7 +37,7 @@ xlog_cil_ticket_alloc(
{
struct xlog_ticket *tic;
tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0);
tic = xlog_ticket_alloc(log, 0, 1, 0);
/*
* set the current reservation to zero so we know to steal the basic
@ -214,13 +214,20 @@ xlog_cil_alloc_shadow_bufs(
}
/*
* We 64-bit align the length of each iovec so that the start
* of the next one is naturally aligned. We'll need to
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
* We 64-bit align the length of each iovec so that the start of
* the next one is naturally aligned. We'll need to account for
* that slack space here.
*
* We also add the xlog_op_header to each region when
* formatting, but that's not accounted to the size of the item
* at this point. Hence we'll need an addition number of bytes
* for each vector to hold an opheader.
*
* Then round nbytes up to 64-bit alignment so that the initial
* buffer alignment is easy to calculate and verify.
*/
nbytes += niovecs * sizeof(uint64_t);
nbytes += niovecs *
(sizeof(uint64_t) + sizeof(struct xlog_op_header));
nbytes = round_up(nbytes, sizeof(uint64_t));
/*
@ -277,22 +284,18 @@ xlog_cil_alloc_shadow_bufs(
/*
* Prepare the log item for insertion into the CIL. Calculate the difference in
* log space and vectors it will consume, and if it is a new item pin it as
* well.
* log space it will consume, and if it is a new item pin it as well.
*/
STATIC void
xfs_cil_prepare_item(
struct xlog *log,
struct xfs_log_vec *lv,
struct xfs_log_vec *old_lv,
int *diff_len,
int *diff_iovecs)
int *diff_len)
{
/* Account for the new LV being passed in */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
*diff_len += lv->lv_bytes;
*diff_iovecs += lv->lv_niovecs;
}
/*
* If there is no old LV, this is the first time we've seen the item in
@ -309,7 +312,6 @@ xfs_cil_prepare_item(
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
*diff_len -= old_lv->lv_bytes;
*diff_iovecs -= old_lv->lv_niovecs;
lv->lv_item->li_lv_shadow = old_lv;
}
@ -358,12 +360,10 @@ static void
xlog_cil_insert_format_items(
struct xlog *log,
struct xfs_trans *tp,
int *diff_len,
int *diff_iovecs)
int *diff_len)
{
struct xfs_log_item *lip;
/* Bail out if we didn't find a log item. */
if (list_empty(&tp->t_items)) {
ASSERT(0);
@ -406,7 +406,6 @@ xlog_cil_insert_format_items(
* set the item up as though it is a new insertion so
* that the space reservation accounting is correct.
*/
*diff_iovecs -= lv->lv_niovecs;
*diff_len -= lv->lv_bytes;
/* Ensure the lv is set up according to ->iop_size */
@ -431,7 +430,7 @@ xlog_cil_insert_format_items(
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
lip->li_ops->iop_format(lip, lv);
insert:
xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
xfs_cil_prepare_item(log, lv, old_lv, diff_len);
}
}
@ -451,7 +450,6 @@ xlog_cil_insert_items(
struct xfs_cil_ctx *ctx = cil->xc_ctx;
struct xfs_log_item *lip;
int len = 0;
int diff_iovecs = 0;
int iclog_space;
int iovhdr_res = 0, split_res = 0, ctx_res = 0;
@ -461,15 +459,10 @@ xlog_cil_insert_items(
* We can do this safely because the context can't checkpoint until we
* are done so it doesn't matter exactly how we update the CIL.
*/
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
xlog_cil_insert_format_items(log, tp, &len);
spin_lock(&cil->xc_cil_lock);
/* account for space used by new iovec headers */
iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
len += iovhdr_res;
ctx->nvecs += diff_iovecs;
/* attach the transaction to the CIL if it has any busy extents */
if (!list_empty(&tp->t_busy))
list_splice_init(&tp->t_busy, &ctx->busy_extents);
@ -822,7 +815,8 @@ restart:
static int
xlog_cil_write_chain(
struct xfs_cil_ctx *ctx,
struct xfs_log_vec *chain)
struct xfs_log_vec *chain,
uint32_t chain_len)
{
struct xlog *log = ctx->cil->xc_log;
int error;
@ -830,7 +824,7 @@ xlog_cil_write_chain(
error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD);
if (error)
return error;
return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS);
return xlog_write(log, ctx, chain, ctx->ticket, chain_len);
}
/*
@ -844,9 +838,14 @@ xlog_cil_write_commit_record(
struct xfs_cil_ctx *ctx)
{
struct xlog *log = ctx->cil->xc_log;
struct xlog_op_header ophdr = {
.oh_clientid = XFS_TRANSACTION,
.oh_tid = cpu_to_be32(ctx->ticket->t_tid),
.oh_flags = XLOG_COMMIT_TRANS,
};
struct xfs_log_iovec reg = {
.i_addr = NULL,
.i_len = 0,
.i_addr = &ophdr,
.i_len = sizeof(struct xlog_op_header),
.i_type = XLOG_REG_TYPE_COMMIT,
};
struct xfs_log_vec vec = {
@ -862,12 +861,79 @@ xlog_cil_write_commit_record(
if (error)
return error;
error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS);
/* account for space used by record data */
ctx->ticket->t_curr_res -= reg.i_len;
error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len);
if (error)
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return error;
}
struct xlog_cil_trans_hdr {
struct xlog_op_header oph[2];
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr[2];
};
/*
* Build a checkpoint transaction header to begin the journal transaction. We
* need to account for the space used by the transaction header here as it is
* not accounted for in xlog_write().
*
* This is the only place we write a transaction header, so we also build the
* log opheaders that indicate the start of a log transaction and wrap the
* transaction header. We keep the start record in it's own log vector rather
* than compacting them into a single region as this ends up making the logic
* in xlog_write() for handling empty opheaders for start, commit and unmount
* records much simpler.
*/
static void
xlog_cil_build_trans_hdr(
struct xfs_cil_ctx *ctx,
struct xlog_cil_trans_hdr *hdr,
struct xfs_log_vec *lvhdr,
int num_iovecs)
{
struct xlog_ticket *tic = ctx->ticket;
__be32 tid = cpu_to_be32(tic->t_tid);
memset(hdr, 0, sizeof(*hdr));
/* Log start record */
hdr->oph[0].oh_tid = tid;
hdr->oph[0].oh_clientid = XFS_TRANSACTION;
hdr->oph[0].oh_flags = XLOG_START_TRANS;
/* log iovec region pointer */
hdr->lhdr[0].i_addr = &hdr->oph[0];
hdr->lhdr[0].i_len = sizeof(struct xlog_op_header);
hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER;
/* log opheader */
hdr->oph[1].oh_tid = tid;
hdr->oph[1].oh_clientid = XFS_TRANSACTION;
hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header));
/* transaction header in host byte order format */
hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
hdr->thdr.th_type = XFS_TRANS_CHECKPOINT;
hdr->thdr.th_tid = tic->t_tid;
hdr->thdr.th_num_items = num_iovecs;
/* log iovec region pointer */
hdr->lhdr[1].i_addr = &hdr->oph[1];
hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) +
sizeof(struct xfs_trans_header);
hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR;
lvhdr->lv_niovecs = 2;
lvhdr->lv_iovecp = &hdr->lhdr[0];
lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
lvhdr->lv_next = ctx->lv_chain;
tic->t_curr_res -= lvhdr->lv_bytes;
}
/*
* Push the Committed Item List to the log.
*
@ -892,11 +958,10 @@ xlog_cil_push_work(
struct xlog *log = cil->xc_log;
struct xfs_log_vec *lv;
struct xfs_cil_ctx *new_ctx;
struct xlog_ticket *tic;
int num_iovecs;
int num_iovecs = 0;
int num_bytes = 0;
int error = 0;
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr;
struct xlog_cil_trans_hdr thdr;
struct xfs_log_vec lvhdr = { NULL };
xfs_csn_t push_seq;
bool push_commit_stable;
@ -975,7 +1040,6 @@ xlog_cil_push_work(
* by the flush lock.
*/
lv = NULL;
num_iovecs = 0;
while (!list_empty(&cil->xc_cil)) {
struct xfs_log_item *item;
@ -989,6 +1053,10 @@ xlog_cil_push_work(
lv = item->li_lv;
item->li_lv = NULL;
num_iovecs += lv->lv_niovecs;
/* we don't write ordered log vectors */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
num_bytes += lv->lv_bytes;
}
/*
@ -1025,26 +1093,11 @@ xlog_cil_push_work(
* Build a checkpoint transaction header and write it to the log to
* begin the transaction. We need to account for the space used by the
* transaction header here as it is not accounted for in xlog_write().
*
* The LSN we need to pass to the log items on transaction commit is
* the LSN reported by the first log vector write. If we use the commit
* record lsn then we can move the tail beyond the grant write head.
*/
tic = ctx->ticket;
thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
thdr.th_type = XFS_TRANS_CHECKPOINT;
thdr.th_tid = tic->t_tid;
thdr.th_num_items = num_iovecs;
lhdr.i_addr = &thdr;
lhdr.i_len = sizeof(xfs_trans_header_t);
lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs);
num_bytes += lvhdr.lv_bytes;
lvhdr.lv_niovecs = 1;
lvhdr.lv_iovecp = &lhdr;
lvhdr.lv_next = ctx->lv_chain;
error = xlog_cil_write_chain(ctx, &lvhdr);
error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes);
if (error)
goto out_abort_free_ticket;
@ -1052,7 +1105,7 @@ xlog_cil_push_work(
if (error)
goto out_abort_free_ticket;
xfs_log_ticket_ungrant(log, tic);
xfs_log_ticket_ungrant(log, ctx->ticket);
/*
* If the checkpoint spans multiple iclogs, wait for all previous iclogs
@ -1116,7 +1169,7 @@ out_skip:
return;
out_abort_free_ticket:
xfs_log_ticket_ungrant(log, tic);
xfs_log_ticket_ungrant(log, ctx->ticket);
ASSERT(xlog_is_shutdown(log));
if (!ctx->commit_iclog) {
xlog_cil_committed(ctx);

View File

@ -142,19 +142,6 @@ enum xlog_iclog_state {
#define XLOG_COVER_OPS 5
/* Ticket reservation region accounting */
#define XLOG_TIC_LEN_MAX 15
/*
* Reservation region
* As would be stored in xfs_log_iovec but without the i_addr which
* we don't care about.
*/
typedef struct xlog_res {
uint r_len; /* region length :4 */
uint r_type; /* region's transaction type :4 */
} xlog_res_t;
typedef struct xlog_ticket {
struct list_head t_queue; /* reserve/write queue */
struct task_struct *t_task; /* task that owns this ticket */
@ -164,15 +151,7 @@ typedef struct xlog_ticket {
int t_unit_res; /* unit reservation in bytes : 4 */
char t_ocnt; /* original count : 1 */
char t_cnt; /* current count : 1 */
char t_clientid; /* who does this belong to; : 1 */
uint8_t t_flags; /* properties of reservation : 1 */
/* reservation array fields */
uint t_res_num; /* num in array : 4 */
uint t_res_num_ophdrs; /* num op hdrs : 4 */
uint t_res_arr_sum; /* array sum : 4 */
uint t_res_o_flow; /* sum overflow : 4 */
xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */
} xlog_ticket_t;
/*
@ -211,7 +190,7 @@ typedef struct xlog_in_core {
u32 ic_offset;
enum xlog_iclog_state ic_state;
unsigned int ic_flags;
char *ic_datap; /* pointer to iclog data */
void *ic_datap; /* pointer to iclog data */
struct list_head ic_callbacks;
/* reference counts need their own cacheline */
@ -242,7 +221,6 @@ struct xfs_cil_ctx {
xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
struct xlog_in_core *commit_iclog;
struct xlog_ticket *ticket; /* chkpt ticket */
int nvecs; /* number of regions */
int space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
@ -441,10 +419,6 @@ struct xlog {
struct xfs_kobj l_kobj;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
#endif
/* log recovery lsn tracking (for buffer submission */
xfs_lsn_t l_recovery_lsn;
@ -509,27 +483,14 @@ extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
char *dp, int size);
extern struct kmem_cache *xfs_log_ticket_cache;
struct xlog_ticket *
xlog_ticket_alloc(
struct xlog *log,
int unit_bytes,
int count,
char client,
bool permanent);
static inline void
xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
{
*ptr += bytes;
*len -= bytes;
*off += bytes;
}
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
int count, bool permanent);
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
void xlog_print_trans(struct xfs_trans *);
int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
uint optype);
uint32_t len);
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);

View File

@ -194,11 +194,9 @@ xfs_trans_reserve(
ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
error = xfs_log_regrant(mp, tp->t_ticket);
} else {
error = xfs_log_reserve(mp,
resp->tr_logres,
error = xfs_log_reserve(mp, resp->tr_logres,
resp->tr_logcount,
&tp->t_ticket, XFS_TRANSACTION,
permanent);
&tp->t_ticket, permanent);
}
if (error)