From 735fbf67df56f402e9baa079a5560ebe8fa049c1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:33:23 +1000 Subject: [PATCH 01/16] xfs: factor out the CIL transaction header building It is static code deep in the middle of the CIL push logic. Factor it out into a helper so that it is clear and easy to modify separately. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_cil.c | 61 ++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ba57323bfdce..93f966c191a3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -868,6 +868,41 @@ xlog_cil_write_commit_record( return error; } +struct xlog_cil_trans_hdr { + struct xfs_trans_header thdr; + struct xfs_log_iovec lhdr; +}; + +/* + * Build a checkpoint transaction header to begin the journal transaction. We + * need to account for the space used by the transaction header here as it is + * not accounted for in xlog_write(). + */ +static void +xlog_cil_build_trans_hdr( + struct xfs_cil_ctx *ctx, + struct xlog_cil_trans_hdr *hdr, + struct xfs_log_vec *lvhdr, + int num_iovecs) +{ + struct xlog_ticket *tic = ctx->ticket; + + memset(hdr, 0, sizeof(*hdr)); + + hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC; + hdr->thdr.th_type = XFS_TRANS_CHECKPOINT; + hdr->thdr.th_tid = tic->t_tid; + hdr->thdr.th_num_items = num_iovecs; + hdr->lhdr.i_addr = &hdr->thdr; + hdr->lhdr.i_len = sizeof(xfs_trans_header_t); + hdr->lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; + tic->t_curr_res -= hdr->lhdr.i_len + sizeof(struct xlog_op_header); + + lvhdr->lv_niovecs = 1; + lvhdr->lv_iovecp = &hdr->lhdr; + lvhdr->lv_next = ctx->lv_chain; +} + /* * Push the Committed Item List to the log. * @@ -892,11 +927,9 @@ xlog_cil_push_work( struct xlog *log = cil->xc_log; struct xfs_log_vec *lv; struct xfs_cil_ctx *new_ctx; - struct xlog_ticket *tic; int num_iovecs; int error = 0; - struct xfs_trans_header thdr; - struct xfs_log_iovec lhdr; + struct xlog_cil_trans_hdr thdr; struct xfs_log_vec lvhdr = { NULL }; xfs_csn_t push_seq; bool push_commit_stable; @@ -1025,24 +1058,8 @@ xlog_cil_push_work( * Build a checkpoint transaction header and write it to the log to * begin the transaction. We need to account for the space used by the * transaction header here as it is not accounted for in xlog_write(). - * - * The LSN we need to pass to the log items on transaction commit is - * the LSN reported by the first log vector write. If we use the commit - * record lsn then we can move the tail beyond the grant write head. */ - tic = ctx->ticket; - thdr.th_magic = XFS_TRANS_HEADER_MAGIC; - thdr.th_type = XFS_TRANS_CHECKPOINT; - thdr.th_tid = tic->t_tid; - thdr.th_num_items = num_iovecs; - lhdr.i_addr = &thdr; - lhdr.i_len = sizeof(xfs_trans_header_t); - lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; - tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); - - lvhdr.lv_niovecs = 1; - lvhdr.lv_iovecp = &lhdr; - lvhdr.lv_next = ctx->lv_chain; + xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs); error = xlog_cil_write_chain(ctx, &lvhdr); if (error) @@ -1052,7 +1069,7 @@ xlog_cil_push_work( if (error) goto out_abort_free_ticket; - xfs_log_ticket_ungrant(log, tic); + xfs_log_ticket_ungrant(log, ctx->ticket); /* * If the checkpoint spans multiple iclogs, wait for all previous iclogs @@ -1116,7 +1133,7 @@ out_skip: return; out_abort_free_ticket: - xfs_log_ticket_ungrant(log, tic); + xfs_log_ticket_ungrant(log, ctx->ticket); ASSERT(xlog_is_shutdown(log)); if (!ctx->commit_iclog) { xlog_cil_committed(ctx); From 6eaed95e21a0872692246e63cb45542d0f62c922 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:33:48 +1000 Subject: [PATCH 02/16] xfs: only CIL pushes require a start record So move the one-off start record writing in xlog_write() out into the static header that the CIL push builds to write into the log initially. This simplifes the xlog_write() logic a lot. pahole on x86-64 confirms that the xlog_cil_trans_hdr is correctly 32 bit aligned and packed for copying the log op and transaction headers directly into the log as a single log region copy. struct xlog_cil_trans_hdr { struct xlog_op_header oph[2]; /* 0 24 */ struct xfs_trans_header thdr; /* 24 16 */ struct xfs_log_iovec lhdr[2]; /* 40 32 */ /* size: 72, cachelines: 2, members: 3 */ /* last cacheline: 8 bytes */ }; A wart is needed to handle the fact that length of the region the opheader points to doesn't include the opheader length. hence if we embed the opheader, we have to substract the opheader length from the length written into the opheader by the generic copying code. This will eventually go away when everything is converted to embedded opheaders. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 90 ++++++++++++++++++++++---------------------- fs/xfs/xfs_log_cil.c | 43 +++++++++++++++++---- 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 499e15b24215..f6fa5426278b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2221,9 +2221,9 @@ xlog_print_trans( } /* - * Calculate the potential space needed by the log vector. We may need a start - * record, and each region gets its own struct xlog_op_header and may need to be - * double word aligned. + * Calculate the potential space needed by the log vector. If this is a start + * transaction, the caller has already accounted for both opheaders in the start + * transaction, so we don't need to account for them here. */ static int xlog_write_calc_vec_length( @@ -2236,9 +2236,6 @@ xlog_write_calc_vec_length( int len = 0; int i; - if (optype & XLOG_START_TRANS) - headers++; - for (lv = log_vector; lv; lv = lv->lv_next) { /* we don't write ordered log vectors */ if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) @@ -2254,24 +2251,20 @@ xlog_write_calc_vec_length( } } + /* Don't account for regions with embedded ophdrs */ + if (optype && headers > 0) { + if (optype & XLOG_START_TRANS) { + ASSERT(headers >= 2); + headers -= 2; + } + } + ticket->t_res_num_ophdrs += headers; len += headers * sizeof(struct xlog_op_header); return len; } -static void -xlog_write_start_rec( - struct xlog_op_header *ophdr, - struct xlog_ticket *ticket) -{ - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); - ophdr->oh_clientid = ticket->t_clientid; - ophdr->oh_len = 0; - ophdr->oh_flags = XLOG_START_TRANS; - ophdr->oh_res2 = 0; -} - static xlog_op_header_t * xlog_write_setup_ophdr( struct xlog *log, @@ -2467,9 +2460,11 @@ xlog_write( * If this is a commit or unmount transaction, we don't need a start * record to be written. We do, however, have to account for the * commit or unmount header that gets written. Hence we always have - * to account for an extra xlog_op_header here. + * to account for an extra xlog_op_header here for commit and unmount + * records. */ - ticket->t_curr_res -= sizeof(struct xlog_op_header); + if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) + ticket->t_curr_res -= sizeof(struct xlog_op_header); if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "ctx ticket reservation ran out. Need to up reservation"); @@ -2510,7 +2505,7 @@ xlog_write( int copy_len; int copy_off; bool ordered = false; - bool wrote_start_rec = false; + bool added_ophdr = false; /* ordered log vectors have no regions to write */ if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { @@ -2524,25 +2519,24 @@ xlog_write( ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); /* - * Before we start formatting log vectors, we need to - * write a start record. Only do this for the first - * iclog we write to. + * The XLOG_START_TRANS has embedded ophdrs for the + * start record and transaction header. They will always + * be the first two regions in the lv chain. */ if (optype & XLOG_START_TRANS) { - xlog_write_start_rec(ptr, ticket); + ophdr = reg->i_addr; + if (index) + optype &= ~XLOG_START_TRANS; + } else { + ophdr = xlog_write_setup_ophdr(log, ptr, + ticket, optype); + if (!ophdr) + return -EIO; + xlog_write_adv_cnt(&ptr, &len, &log_offset, - sizeof(struct xlog_op_header)); - optype &= ~XLOG_START_TRANS; - wrote_start_rec = true; - } - - ophdr = xlog_write_setup_ophdr(log, ptr, ticket, optype); - if (!ophdr) - return -EIO; - - xlog_write_adv_cnt(&ptr, &len, &log_offset, sizeof(struct xlog_op_header)); - + added_ophdr = true; + } len += xlog_write_setup_copy(ticket, ophdr, iclog->ic_size-log_offset, reg->i_len, @@ -2551,13 +2545,22 @@ xlog_write( &partial_copy_len); xlog_verify_dest_ptr(log, ptr); + + /* + * Wart: need to update length in embedded ophdr not + * to include it's own length. + */ + if (!added_ophdr) { + ophdr->oh_len = cpu_to_be32(copy_len - + sizeof(struct xlog_op_header)); + } /* * Copy region. * - * Unmount records just log an opheader, so can have - * empty payloads with no data region to copy. Hence we - * only copy the payload if the vector says it has data - * to copy. + * Commit and unmount records just log an opheader, so + * we can have empty payloads with no data region to + * copy. Hence we only copy the payload if the vector + * says it has data to copy. */ ASSERT(copy_len >= 0); if (copy_len > 0) { @@ -2565,12 +2568,9 @@ xlog_write( xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); } - copy_len += sizeof(struct xlog_op_header); - record_cnt++; - if (wrote_start_rec) { + if (added_ophdr) copy_len += sizeof(struct xlog_op_header); - record_cnt++; - } + record_cnt++; data_cnt += contwr ? copy_len : 0; error = xlog_write_copy_finish(log, iclog, optype, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 93f966c191a3..5ff046e82912 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -869,14 +869,22 @@ xlog_cil_write_commit_record( } struct xlog_cil_trans_hdr { + struct xlog_op_header oph[2]; struct xfs_trans_header thdr; - struct xfs_log_iovec lhdr; + struct xfs_log_iovec lhdr[2]; }; /* * Build a checkpoint transaction header to begin the journal transaction. We * need to account for the space used by the transaction header here as it is * not accounted for in xlog_write(). + * + * This is the only place we write a transaction header, so we also build the + * log opheaders that indicate the start of a log transaction and wrap the + * transaction header. We keep the start record in it's own log vector rather + * than compacting them into a single region as this ends up making the logic + * in xlog_write() for handling empty opheaders for start, commit and unmount + * records much simpler. */ static void xlog_cil_build_trans_hdr( @@ -886,20 +894,41 @@ xlog_cil_build_trans_hdr( int num_iovecs) { struct xlog_ticket *tic = ctx->ticket; + __be32 tid = cpu_to_be32(tic->t_tid); memset(hdr, 0, sizeof(*hdr)); + /* Log start record */ + hdr->oph[0].oh_tid = tid; + hdr->oph[0].oh_clientid = XFS_TRANSACTION; + hdr->oph[0].oh_flags = XLOG_START_TRANS; + + /* log iovec region pointer */ + hdr->lhdr[0].i_addr = &hdr->oph[0]; + hdr->lhdr[0].i_len = sizeof(struct xlog_op_header); + hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER; + + /* log opheader */ + hdr->oph[1].oh_tid = tid; + hdr->oph[1].oh_clientid = XFS_TRANSACTION; + hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header)); + + /* transaction header in host byte order format */ hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC; hdr->thdr.th_type = XFS_TRANS_CHECKPOINT; hdr->thdr.th_tid = tic->t_tid; hdr->thdr.th_num_items = num_iovecs; - hdr->lhdr.i_addr = &hdr->thdr; - hdr->lhdr.i_len = sizeof(xfs_trans_header_t); - hdr->lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; - tic->t_curr_res -= hdr->lhdr.i_len + sizeof(struct xlog_op_header); - lvhdr->lv_niovecs = 1; - lvhdr->lv_iovecp = &hdr->lhdr; + /* log iovec region pointer */ + hdr->lhdr[1].i_addr = &hdr->oph[1]; + hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) + + sizeof(struct xfs_trans_header); + hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR; + + tic->t_curr_res -= hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; + + lvhdr->lv_niovecs = 2; + lvhdr->lv_iovecp = &hdr->lhdr[0]; lvhdr->lv_next = ctx->lv_chain; } From ffa04c1f2cb047d6a44c3570bfb6e1ca5ba7f489 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:34:04 +1000 Subject: [PATCH 03/16] xfs: embed the xlog_op_header in the unmount record Remove another case where xlog_write() has to prepend an opheader to a log transaction. The unmount record + ophdr is smaller than the minimum amount of space guaranteed to be free in an iclog (2 * sizeof(ophdr)) and so we don't have to care about an unmount record being split across 2 iclogs. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6fa5426278b..00e9af9b246a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -901,12 +901,22 @@ xlog_write_unmount_record( struct xlog *log, struct xlog_ticket *ticket) { - struct xfs_unmount_log_format ulf = { - .magic = XLOG_UNMOUNT_TYPE, + struct { + struct xlog_op_header ophdr; + struct xfs_unmount_log_format ulf; + } unmount_rec = { + .ophdr = { + .oh_clientid = XFS_LOG, + .oh_tid = cpu_to_be32(ticket->t_tid), + .oh_flags = XLOG_UNMOUNT_TRANS, + }, + .ulf = { + .magic = XLOG_UNMOUNT_TYPE, + }, }; struct xfs_log_iovec reg = { - .i_addr = &ulf, - .i_len = sizeof(ulf), + .i_addr = &unmount_rec, + .i_len = sizeof(unmount_rec), .i_type = XLOG_REG_TYPE_UNMOUNT, }; struct xfs_log_vec vec = { @@ -914,8 +924,12 @@ xlog_write_unmount_record( .lv_iovecp = ®, }; + BUILD_BUG_ON((sizeof(struct xlog_op_header) + + sizeof(struct xfs_unmount_log_format)) != + sizeof(unmount_rec)); + /* account for space used by record data */ - ticket->t_curr_res -= sizeof(ulf); + ticket->t_curr_res -= sizeof(unmount_rec); return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS); } @@ -2253,6 +2267,8 @@ xlog_write_calc_vec_length( /* Don't account for regions with embedded ophdrs */ if (optype && headers > 0) { + if (optype & XLOG_UNMOUNT_TRANS) + headers--; if (optype & XLOG_START_TRANS) { ASSERT(headers >= 2); headers -= 2; @@ -2458,12 +2474,11 @@ xlog_write( /* * If this is a commit or unmount transaction, we don't need a start - * record to be written. We do, however, have to account for the - * commit or unmount header that gets written. Hence we always have - * to account for an extra xlog_op_header here for commit and unmount - * records. + * record to be written. We do, however, have to account for the commit + * header that gets written. Hence we always have to account for an + * extra xlog_op_header here for commit records. */ - if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) + if (optype & XLOG_COMMIT_TRANS) ticket->t_curr_res -= sizeof(struct xlog_op_header); if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, @@ -2527,6 +2542,8 @@ xlog_write( ophdr = reg->i_addr; if (index) optype &= ~XLOG_START_TRANS; + } else if (optype & XLOG_UNMOUNT_TRANS) { + ophdr = reg->i_addr; } else { ophdr = xlog_write_setup_ophdr(log, ptr, ticket, optype); @@ -2557,7 +2574,7 @@ xlog_write( /* * Copy region. * - * Commit and unmount records just log an opheader, so + * Commit records just log an opheader, so * we can have empty payloads with no data region to * copy. Hence we only copy the payload if the vector * says it has data to copy. From 54021b624261fe5b429d7ab4d081c3b2cca153a8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:34:15 +1000 Subject: [PATCH 04/16] xfs: embed the xlog_op_header in the commit record Remove the final case where xlog_write() has to prepend an opheader to a log transaction. Similar to the start record, the commit record is just an empty opheader with a XLOG_COMMIT_TRANS type, so we can just make this the payload for the region being passed to xlog_write() and remove the special handling in xlog_write() for the commit record. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 22 ++++++---------------- fs/xfs/xfs_log_cil.c | 11 +++++++++-- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 00e9af9b246a..c23a15c56ef1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2267,11 +2267,10 @@ xlog_write_calc_vec_length( /* Don't account for regions with embedded ophdrs */ if (optype && headers > 0) { - if (optype & XLOG_UNMOUNT_TRANS) - headers--; + headers--; if (optype & XLOG_START_TRANS) { - ASSERT(headers >= 2); - headers -= 2; + ASSERT(headers >= 1); + headers--; } } @@ -2472,14 +2471,6 @@ xlog_write( int data_cnt = 0; int error = 0; - /* - * If this is a commit or unmount transaction, we don't need a start - * record to be written. We do, however, have to account for the commit - * header that gets written. Hence we always have to account for an - * extra xlog_op_header here for commit records. - */ - if (optype & XLOG_COMMIT_TRANS) - ticket->t_curr_res -= sizeof(struct xlog_op_header); if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "ctx ticket reservation ran out. Need to up reservation"); @@ -2536,14 +2527,13 @@ xlog_write( /* * The XLOG_START_TRANS has embedded ophdrs for the * start record and transaction header. They will always - * be the first two regions in the lv chain. + * be the first two regions in the lv chain. Commit and + * unmount records also have embedded ophdrs. */ - if (optype & XLOG_START_TRANS) { + if (optype) { ophdr = reg->i_addr; if (index) optype &= ~XLOG_START_TRANS; - } else if (optype & XLOG_UNMOUNT_TRANS) { - ophdr = reg->i_addr; } else { ophdr = xlog_write_setup_ophdr(log, ptr, ticket, optype); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5ff046e82912..53dc5add5359 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -844,9 +844,14 @@ xlog_cil_write_commit_record( struct xfs_cil_ctx *ctx) { struct xlog *log = ctx->cil->xc_log; + struct xlog_op_header ophdr = { + .oh_clientid = XFS_TRANSACTION, + .oh_tid = cpu_to_be32(ctx->ticket->t_tid), + .oh_flags = XLOG_COMMIT_TRANS, + }; struct xfs_log_iovec reg = { - .i_addr = NULL, - .i_len = 0, + .i_addr = &ophdr, + .i_len = sizeof(struct xlog_op_header), .i_type = XLOG_REG_TYPE_COMMIT, }; struct xfs_log_vec vec = { @@ -862,6 +867,8 @@ xlog_cil_write_commit_record( if (error) return error; + /* account for space used by record data */ + ctx->ticket->t_curr_res -= reg.i_len; error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); From c7610dceed39d978ef1ee0f2ab5a3c8d2d54d120 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:34:33 +1000 Subject: [PATCH 05/16] xfs: log tickets don't need log client id We currently set the log ticket client ID when we reserve a transaction. This client ID is only ever written to the log by a CIL checkpoint or unmount records, and so anything using a high level transaction allocated through xfs_trans_alloc() does not need a log ticket client ID to be set. For the CIL checkpoint, the client ID written to the journal is always XFS_TRANSACTION, and for the unmount record it is always XFS_LOG, and nothing else writes to the log. All of these operations tell xlog_write() exactly what they need to write to the log (the optype) and build their own opheaders for start, commit and unmount records. Hence we no longer need to set the client id in either the log ticket or the xfs_trans. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 1 - fs/xfs/xfs_log.c | 47 ++++++---------------------------- fs/xfs/xfs_log.h | 14 ++++------ fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_priv.h | 10 ++------ fs/xfs/xfs_trans.c | 6 ++--- 6 files changed, 18 insertions(+), 62 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index b322db523d65..2b89141ae81a 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -69,7 +69,6 @@ static inline uint xlog_get_cycle(char *ptr) /* Log Clients */ #define XFS_TRANSACTION 0x69 -#define XFS_VOLUME 0x2 #define XFS_LOG 0xaa #define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c23a15c56ef1..1f6e7092ec76 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -434,10 +434,9 @@ out_error: int xfs_log_reserve( struct xfs_mount *mp, - int unit_bytes, - int cnt, + int unit_bytes, + int cnt, struct xlog_ticket **ticp, - uint8_t client, bool permanent) { struct xlog *log = mp->m_log; @@ -445,15 +444,13 @@ xfs_log_reserve( int need_bytes; int error = 0; - ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - if (xlog_is_shutdown(log)) return -EIO; XFS_STATS_INC(mp, xs_try_logspace); ASSERT(*ticp == NULL); - tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent); + tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent); *ticp = tic; xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt @@ -947,7 +944,7 @@ xlog_unmount_write( struct xlog_ticket *tic = NULL; int error; - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); + error = xfs_log_reserve(mp, 600, 1, &tic, 0); if (error) goto out_err; @@ -2282,35 +2279,13 @@ xlog_write_calc_vec_length( static xlog_op_header_t * xlog_write_setup_ophdr( - struct xlog *log, struct xlog_op_header *ophdr, - struct xlog_ticket *ticket, - uint flags) + struct xlog_ticket *ticket) { ophdr->oh_tid = cpu_to_be32(ticket->t_tid); - ophdr->oh_clientid = ticket->t_clientid; + ophdr->oh_clientid = XFS_TRANSACTION; ophdr->oh_res2 = 0; - - /* are we copying a commit or unmount record? */ - ophdr->oh_flags = flags; - - /* - * We've seen logs corrupted with bad transaction client ids. This - * makes sure that XFS doesn't generate them on. Turn this into an EIO - * and shut down the filesystem. - */ - switch (ophdr->oh_clientid) { - case XFS_TRANSACTION: - case XFS_VOLUME: - case XFS_LOG: - break; - default: - xfs_warn(log->l_mp, - "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT, - ophdr->oh_clientid, ticket); - return NULL; - } - + ophdr->oh_flags = 0; return ophdr; } @@ -2535,11 +2510,7 @@ xlog_write( if (index) optype &= ~XLOG_START_TRANS; } else { - ophdr = xlog_write_setup_ophdr(log, ptr, - ticket, optype); - if (!ophdr) - return -EIO; - + ophdr = xlog_write_setup_ophdr(ptr, ticket); xlog_write_adv_cnt(&ptr, &len, &log_offset, sizeof(struct xlog_op_header)); added_ophdr = true; @@ -3598,7 +3569,6 @@ xlog_ticket_alloc( struct xlog *log, int unit_bytes, int cnt, - char client, bool permanent) { struct xlog_ticket *tic; @@ -3616,7 +3586,6 @@ xlog_ticket_alloc( tic->t_cnt = cnt; tic->t_ocnt = cnt; tic->t_tid = prandom_u32(); - tic->t_clientid = client; if (permanent) tic->t_flags |= XLOG_TIC_PERM_RESERV; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index dc1b77b92fc1..09b8fe9994f2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -117,15 +117,11 @@ int xfs_log_mount_finish(struct xfs_mount *mp); void xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); -void xfs_log_space_wake(struct xfs_mount *mp); -int xfs_log_reserve(struct xfs_mount *mp, - int length, - int count, - struct xlog_ticket **ticket, - uint8_t clientid, - bool permanent); -int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); -void xfs_log_unmount(struct xfs_mount *mp); +void xfs_log_space_wake(struct xfs_mount *mp); +int xfs_log_reserve(struct xfs_mount *mp, int length, int count, + struct xlog_ticket **ticket, bool permanent); +int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); +void xfs_log_unmount(struct xfs_mount *mp); bool xfs_log_writable(struct xfs_mount *mp); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 53dc5add5359..2403a7bbb913 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -37,7 +37,7 @@ xlog_cil_ticket_alloc( { struct xlog_ticket *tic; - tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0); + tic = xlog_ticket_alloc(log, 0, 1, 0); /* * set the current reservation to zero so we know to steal the basic diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 401cdc400980..6f247561588b 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -164,7 +164,6 @@ typedef struct xlog_ticket { int t_unit_res; /* unit reservation in bytes : 4 */ char t_ocnt; /* original count : 1 */ char t_cnt; /* current count : 1 */ - char t_clientid; /* who does this belong to; : 1 */ char t_flags; /* properties of reservation : 1 */ /* reservation array fields */ @@ -509,13 +508,8 @@ extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); extern struct kmem_cache *xfs_log_ticket_cache; -struct xlog_ticket * -xlog_ticket_alloc( - struct xlog *log, - int unit_bytes, - int count, - char client, - bool permanent); +struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, + int count, bool permanent); static inline void xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 0ac717aad380..8b9d36b19ae4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -194,11 +194,9 @@ xfs_trans_reserve( ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); error = xfs_log_regrant(mp, tp->t_ticket); } else { - error = xfs_log_reserve(mp, - resp->tr_logres, + error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount, - &tp->t_ticket, XFS_TRANSACTION, - permanent); + &tp->t_ticket, permanent); } if (error) From 3c352bef8335a8d9d8f14bc0bd533df023280a72 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:34:49 +1000 Subject: [PATCH 06/16] xfs: move log iovec alignment to preparation function To include log op headers directly into the log iovec regions that the ophdrs wrap, we need to move the buffer alignment code from xlog_finish_iovec() to xlog_prepare_iovec(). This is because the xlog_op_header is only 12 bytes long, and we need the buffer that the caller formats their data into to be 8 byte aligned. Hence once we start prepending the ophdr in xlog_prepare_iovec(), we are going to need to manage the padding directly to ensure that the buffer pointer returned is correctly aligned. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 09b8fe9994f2..d1fc43476166 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -21,6 +21,16 @@ struct xfs_log_vec { #define XFS_LOG_VEC_ORDERED (-1) +/* + * We need to make sure the buffer pointer returned is naturally aligned for the + * biggest basic data type we put into it. We have already accounted for this + * padding when sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + */ static inline void * xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, uint type) @@ -34,6 +44,9 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, vec = &lv->lv_iovecp[0]; } + if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t))) + lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t)); + vec->i_type = type; vec->i_addr = lv->lv_buf + lv->lv_buf_len; @@ -43,20 +56,10 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, return vec->i_addr; } -/* - * We need to make sure the next buffer is naturally aligned for the biggest - * basic data type we put into it. We already accounted for this padding when - * sizing the buffer. - * - * However, this padding does not get written into the log, and hence we have to - * track the space used by the log vectors separately to prevent log space hangs - * due to inaccurate accounting (i.e. a leak) of the used log space through the - * CIL context ticket. - */ static inline void xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) { - lv->lv_buf_len += round_up(len, sizeof(uint64_t)); + lv->lv_buf_len += len; lv->lv_bytes += len; vec->i_len = len; } From 8d547cf9d2392585204075243f29022a619550f2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:34:59 +1000 Subject: [PATCH 07/16] xfs: reserve space and initialise xlog_op_header in item formatting Current xlog_write() adds op headers to the log manually for every log item region that is in the vector passed to it. While xlog_write() needs to stamp the transaction ID into the ophdr, we already know it's length, flags, clientid, etc at CIL commit time. This means the only time that xlog write really needs to format and reserve space for a new ophdr is when a region is split across two iclogs. Adding the opheader and accounting for it as part of the normal formatted item region means we simplify the accounting of space used by a transaction and we don't have to special case reserving of space in for the ophdrs in xlog_write(). It also means we can largely initialise the ophdr in transaction commit instead of xlog_write, making the xlog_write formatting inner loop much tighter. xlog_prepare_iovec() is now too large to stay as an inline function, so we move it out of line and into xfs_log.c. Object sizes: text data bss dec hex filename 1125934 305951 484 1432369 15db31 fs/xfs/built-in.a.before 1123360 305951 484 1429795 15d123 fs/xfs/built-in.a.after So the code is a roughly 2.5kB smaller with xlog_prepare_iovec() now out of line, even though it grew in size itself. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 115 +++++++++++++++++++++++++++++-------------- fs/xfs/xfs_log.h | 42 +++------------- fs/xfs/xfs_log_cil.c | 25 +++++----- 3 files changed, 99 insertions(+), 83 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1f6e7092ec76..23ca073de6df 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -90,6 +90,62 @@ xlog_iclogs_empty( static int xfs_log_cover(struct xfs_mount *); +/* + * We need to make sure the buffer pointer returned is naturally aligned for the + * biggest basic data type we put into it. We have already accounted for this + * padding when sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + * + * We also add space for the xlog_op_header that describes this region in the + * log. This prepends the data region we return to the caller to copy their data + * into, so do all the static initialisation of the ophdr now. Because the ophdr + * is not 8 byte aligned, we have to be careful to ensure that we align the + * start of the buffer such that the region we return to the call is 8 byte + * aligned and packed against the tail of the ophdr. + */ +void * +xlog_prepare_iovec( + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp, + uint type) +{ + struct xfs_log_iovec *vec = *vecp; + struct xlog_op_header *oph; + uint32_t len; + void *buf; + + if (vec) { + ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); + vec++; + } else { + vec = &lv->lv_iovecp[0]; + } + + len = lv->lv_buf_len + sizeof(struct xlog_op_header); + if (!IS_ALIGNED(len, sizeof(uint64_t))) { + lv->lv_buf_len = round_up(len, sizeof(uint64_t)) - + sizeof(struct xlog_op_header); + } + + vec->i_type = type; + vec->i_addr = lv->lv_buf + lv->lv_buf_len; + + oph = vec->i_addr; + oph->oh_clientid = XFS_TRANSACTION; + oph->oh_res2 = 0; + oph->oh_flags = 0; + + buf = vec->i_addr + sizeof(struct xlog_op_header); + ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); + + *vecp = vec; + return buf; +} + static void xlog_grant_sub_space( struct xlog *log, @@ -2232,9 +2288,9 @@ xlog_print_trans( } /* - * Calculate the potential space needed by the log vector. If this is a start - * transaction, the caller has already accounted for both opheaders in the start - * transaction, so we don't need to account for them here. + * Calculate the potential space needed by the log vector. All regions contain + * their own opheaders and they are accounted for in region space so we don't + * need to add them to the vector length here. */ static int xlog_write_calc_vec_length( @@ -2261,18 +2317,7 @@ xlog_write_calc_vec_length( xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type); } } - - /* Don't account for regions with embedded ophdrs */ - if (optype && headers > 0) { - headers--; - if (optype & XLOG_START_TRANS) { - ASSERT(headers >= 1); - headers--; - } - } - ticket->t_res_num_ophdrs += headers; - len += headers * sizeof(struct xlog_op_header); return len; } @@ -2282,7 +2327,6 @@ xlog_write_setup_ophdr( struct xlog_op_header *ophdr, struct xlog_ticket *ticket) { - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); ophdr->oh_clientid = XFS_TRANSACTION; ophdr->oh_res2 = 0; ophdr->oh_flags = 0; @@ -2500,21 +2544,25 @@ xlog_write( ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); /* - * The XLOG_START_TRANS has embedded ophdrs for the - * start record and transaction header. They will always - * be the first two regions in the lv chain. Commit and - * unmount records also have embedded ophdrs. + * Regions always have their ophdr at the start of the + * region, except for: + * - a transaction start which has a start record ophdr + * before the first region ophdr; and + * - the previous region didn't fully fit into an iclog + * so needs a continuation ophdr to prepend the region + * in this new iclog. */ - if (optype) { - ophdr = reg->i_addr; - if (index) - optype &= ~XLOG_START_TRANS; - } else { + ophdr = reg->i_addr; + if (optype && index) { + optype &= ~XLOG_START_TRANS; + } else if (partial_copy) { ophdr = xlog_write_setup_ophdr(ptr, ticket); xlog_write_adv_cnt(&ptr, &len, &log_offset, sizeof(struct xlog_op_header)); added_ophdr = true; } + ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + len += xlog_write_setup_copy(ticket, ophdr, iclog->ic_size-log_offset, reg->i_len, @@ -2532,20 +2580,11 @@ xlog_write( ophdr->oh_len = cpu_to_be32(copy_len - sizeof(struct xlog_op_header)); } - /* - * Copy region. - * - * Commit records just log an opheader, so - * we can have empty payloads with no data region to - * copy. Hence we only copy the payload if the vector - * says it has data to copy. - */ - ASSERT(copy_len >= 0); - if (copy_len > 0) { - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, - copy_len); - } + + ASSERT(copy_len > 0); + memcpy(ptr, reg->i_addr + copy_off, copy_len); + xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); + if (added_ophdr) copy_len += sizeof(struct xlog_op_header); record_cnt++; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d1fc43476166..816f44d7dc81 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -21,44 +21,18 @@ struct xfs_log_vec { #define XFS_LOG_VEC_ORDERED (-1) -/* - * We need to make sure the buffer pointer returned is naturally aligned for the - * biggest basic data type we put into it. We have already accounted for this - * padding when sizing the buffer. - * - * However, this padding does not get written into the log, and hence we have to - * track the space used by the log vectors separately to prevent log space hangs - * due to inaccurate accounting (i.e. a leak) of the used log space through the - * CIL context ticket. - */ -static inline void * -xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - uint type) -{ - struct xfs_log_iovec *vec = *vecp; - - if (vec) { - ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); - vec++; - } else { - vec = &lv->lv_iovecp[0]; - } - - if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t))) - lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t)); - - vec->i_type = type; - vec->i_addr = lv->lv_buf + lv->lv_buf_len; - - ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); - - *vecp = vec; - return vec->i_addr; -} +void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + uint type); static inline void xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) { + struct xlog_op_header *oph = vec->i_addr; + + /* opheader tracks payload length, logvec tracks region length */ + oph->oh_len = cpu_to_be32(len); + + len += sizeof(struct xlog_op_header); lv->lv_buf_len += len; lv->lv_bytes += len; vec->i_len = len; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 2403a7bbb913..5ccbb6bd4655 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -214,13 +214,20 @@ xlog_cil_alloc_shadow_bufs( } /* - * We 64-bit align the length of each iovec so that the start - * of the next one is naturally aligned. We'll need to - * account for that slack space here. Then round nbytes up - * to 64-bit alignment so that the initial buffer alignment is - * easy to calculate and verify. + * We 64-bit align the length of each iovec so that the start of + * the next one is naturally aligned. We'll need to account for + * that slack space here. + * + * We also add the xlog_op_header to each region when + * formatting, but that's not accounted to the size of the item + * at this point. Hence we'll need an addition number of bytes + * for each vector to hold an opheader. + * + * Then round nbytes up to 64-bit alignment so that the initial + * buffer alignment is easy to calculate and verify. */ - nbytes += niovecs * sizeof(uint64_t); + nbytes += niovecs * + (sizeof(uint64_t) + sizeof(struct xlog_op_header)); nbytes = round_up(nbytes, sizeof(uint64_t)); /* @@ -465,11 +472,6 @@ xlog_cil_insert_items( spin_lock(&cil->xc_cil_lock); - /* account for space used by new iovec headers */ - iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t); - len += iovhdr_res; - ctx->nvecs += diff_iovecs; - /* attach the transaction to the CIL if it has any busy extents */ if (!list_empty(&tp->t_busy)) list_splice_init(&tp->t_busy, &ctx->busy_extents); @@ -501,6 +503,7 @@ xlog_cil_insert_items( } tp->t_ticket->t_curr_res -= len; ctx->space_used += len; + ctx->nvecs += diff_iovecs; /* * If we've overrun the reservation, dump the tx details before we move From c5141320c42b08b99b7c4b250ac9675d7c7ed3a7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:35:09 +1000 Subject: [PATCH 08/16] xfs: log ticket region debug is largely useless xlog_tic_add_region() is used to trace the regions being added to a log ticket to provide information in the situation where a ticket reservation overrun occurs. The information gathered is stored int the ticket, and dumped if xlog_print_tic_res() is called. For a front end struct xfs_trans overrun, the ticket only contains reservation tracking information - the ticket is never handed to the log so has no regions attached to it. The overrun debug information in this case comes from xlog_print_trans(), which walks the items attached to the transaction and dumps their attached formatted log vectors directly. It also dumps the ticket state, but that only contains reservation accounting and nothing else. Hence xlog_print_tic_res() never dumps region or overrun information from this path. xlog_tic_add_region() is actually called from xlog_write(), which means it is being used to track the regions seen in a CIL checkpoint log vector chain. In looking at CIL behaviour recently, I've seen 32MB checkpoints regularly exceed 250,000 regions in the LV chain. The log ticket debug code can track *15* regions. IOWs, if there is a ticket overrun in the CIL code, the ticket region tracking code is going to be completely useless for determining what went wrong. The only thing it can tell us is how much of an overrun occurred, and we really don't need extra debug information in the log ticket to tell us that. Indeed, the main place we call xlog_tic_add_region() is also adding up the number of regions and the space used so that xlog_write() knows how much will be written to the log. This is exactly the same information that log ticket is storing once we take away the useless region tracking array. Hence xlog_tic_add_region() is not useful, but can be called 250,000 times a CIL push... Just strip all that debug "information" out of the of the log ticket and only have it report reservation space information when an overrun occurs. This also reduces the size of a log ticket down by about 150 bytes... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 107 +++--------------------------------------- fs/xfs/xfs_log_priv.h | 20 -------- 2 files changed, 6 insertions(+), 121 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 23ca073de6df..4e3fc28c12f5 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -378,30 +378,6 @@ xlog_grant_head_check( return error; } -static void -xlog_tic_reset_res(xlog_ticket_t *tic) -{ - tic->t_res_num = 0; - tic->t_res_arr_sum = 0; - tic->t_res_num_ophdrs = 0; -} - -static void -xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) -{ - if (tic->t_res_num == XLOG_TIC_LEN_MAX) { - /* add to overflow and start again */ - tic->t_res_o_flow += tic->t_res_arr_sum; - tic->t_res_num = 0; - tic->t_res_arr_sum = 0; - } - - tic->t_res_arr[tic->t_res_num].r_len = len; - tic->t_res_arr[tic->t_res_num].r_type = type; - tic->t_res_arr_sum += len; - tic->t_res_num++; -} - bool xfs_log_writable( struct xfs_mount *mp) @@ -451,8 +427,6 @@ xfs_log_regrant( xlog_grant_push_ail(log, tic->t_unit_res); tic->t_curr_res = tic->t_unit_res; - xlog_tic_reset_res(tic); - if (tic->t_cnt > 0) return 0; @@ -2178,63 +2152,11 @@ xlog_print_tic_res( struct xfs_mount *mp, struct xlog_ticket *ticket) { - uint i; - uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); - - /* match with XLOG_REG_TYPE_* in xfs_log.h */ -#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str - static char *res_type_str[] = { - REG_TYPE_STR(BFORMAT, "bformat"), - REG_TYPE_STR(BCHUNK, "bchunk"), - REG_TYPE_STR(EFI_FORMAT, "efi_format"), - REG_TYPE_STR(EFD_FORMAT, "efd_format"), - REG_TYPE_STR(IFORMAT, "iformat"), - REG_TYPE_STR(ICORE, "icore"), - REG_TYPE_STR(IEXT, "iext"), - REG_TYPE_STR(IBROOT, "ibroot"), - REG_TYPE_STR(ILOCAL, "ilocal"), - REG_TYPE_STR(IATTR_EXT, "iattr_ext"), - REG_TYPE_STR(IATTR_BROOT, "iattr_broot"), - REG_TYPE_STR(IATTR_LOCAL, "iattr_local"), - REG_TYPE_STR(QFORMAT, "qformat"), - REG_TYPE_STR(DQUOT, "dquot"), - REG_TYPE_STR(QUOTAOFF, "quotaoff"), - REG_TYPE_STR(LRHEADER, "LR header"), - REG_TYPE_STR(UNMOUNT, "unmount"), - REG_TYPE_STR(COMMIT, "commit"), - REG_TYPE_STR(TRANSHDR, "trans header"), - REG_TYPE_STR(ICREATE, "inode create"), - REG_TYPE_STR(RUI_FORMAT, "rui_format"), - REG_TYPE_STR(RUD_FORMAT, "rud_format"), - REG_TYPE_STR(CUI_FORMAT, "cui_format"), - REG_TYPE_STR(CUD_FORMAT, "cud_format"), - REG_TYPE_STR(BUI_FORMAT, "bui_format"), - REG_TYPE_STR(BUD_FORMAT, "bud_format"), - }; - BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1); -#undef REG_TYPE_STR - xfs_warn(mp, "ticket reservation summary:"); - xfs_warn(mp, " unit res = %d bytes", - ticket->t_unit_res); - xfs_warn(mp, " current res = %d bytes", - ticket->t_curr_res); - xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)", - ticket->t_res_arr_sum, ticket->t_res_o_flow); - xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)", - ticket->t_res_num_ophdrs, ophdr_spc); - xfs_warn(mp, " ophdr + reg = %u bytes", - ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc); - xfs_warn(mp, " num regions = %u", - ticket->t_res_num); - - for (i = 0; i < ticket->t_res_num; i++) { - uint r_type = ticket->t_res_arr[i].r_type; - xfs_warn(mp, "region[%u]: %s - %u bytes", i, - ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? - "bad-rtype" : res_type_str[r_type]), - ticket->t_res_arr[i].r_len); - } + xfs_warn(mp, " unit res = %d bytes", ticket->t_unit_res); + xfs_warn(mp, " current res = %d bytes", ticket->t_curr_res); + xfs_warn(mp, " original count = %d", ticket->t_ocnt); + xfs_warn(mp, " remaining count = %d", ticket->t_cnt); } /* @@ -2299,7 +2221,6 @@ xlog_write_calc_vec_length( uint optype) { struct xfs_log_vec *lv; - int headers = 0; int len = 0; int i; @@ -2308,17 +2229,9 @@ xlog_write_calc_vec_length( if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) continue; - headers += lv->lv_niovecs; - - for (i = 0; i < lv->lv_niovecs; i++) { - struct xfs_log_iovec *vecp = &lv->lv_iovecp[i]; - - len += vecp->i_len; - xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type); - } + for (i = 0; i < lv->lv_niovecs; i++) + len += lv->lv_iovecp[i].i_len; } - ticket->t_res_num_ophdrs += headers; - return len; } @@ -2377,7 +2290,6 @@ xlog_write_setup_copy( /* account for new log op header */ ticket->t_curr_res -= sizeof(struct xlog_op_header); - ticket->t_res_num_ophdrs++; return sizeof(struct xlog_op_header); } @@ -3025,9 +2937,6 @@ restart: */ if (log_offset == 0) { ticket->t_curr_res -= log->l_iclog_hsize; - xlog_tic_add_region(ticket, - log->l_iclog_hsize, - XLOG_REG_TYPE_LRHEADER); head->h_cycle = cpu_to_be32(log->l_curr_cycle); head->h_lsn = cpu_to_be64( xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); @@ -3107,7 +3016,6 @@ xfs_log_ticket_regrant( xlog_grant_sub_space(log, &log->l_write_head.grant, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; - xlog_tic_reset_res(ticket); trace_xfs_log_ticket_regrant_sub(log, ticket); @@ -3118,7 +3026,6 @@ xfs_log_ticket_regrant( trace_xfs_log_ticket_regrant_exit(log, ticket); ticket->t_curr_res = ticket->t_unit_res; - xlog_tic_reset_res(ticket); } xfs_log_ticket_put(ticket); @@ -3628,8 +3535,6 @@ xlog_ticket_alloc( if (permanent) tic->t_flags |= XLOG_TIC_PERM_RESERV; - xlog_tic_reset_res(tic); - return tic; } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 6f247561588b..b588bb3d02cf 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -142,19 +142,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -/* Ticket reservation region accounting */ -#define XLOG_TIC_LEN_MAX 15 - -/* - * Reservation region - * As would be stored in xfs_log_iovec but without the i_addr which - * we don't care about. - */ -typedef struct xlog_res { - uint r_len; /* region length :4 */ - uint r_type; /* region's transaction type :4 */ -} xlog_res_t; - typedef struct xlog_ticket { struct list_head t_queue; /* reserve/write queue */ struct task_struct *t_task; /* task that owns this ticket */ @@ -165,13 +152,6 @@ typedef struct xlog_ticket { char t_ocnt; /* original count : 1 */ char t_cnt; /* current count : 1 */ char t_flags; /* properties of reservation : 1 */ - - /* reservation array fields */ - uint t_res_num; /* num in array : 4 */ - uint t_res_num_ophdrs; /* num op hdrs : 4 */ - uint t_res_arr_sum; /* array sum : 4 */ - uint t_res_o_flow; /* sum overflow : 4 */ - xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */ } xlog_ticket_t; /* From d80fc2914f9125a723d9af7038b1592fa8d1ea96 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:35:19 +1000 Subject: [PATCH 09/16] xfs: pass lv chain length into xlog_write() The caller of xlog_write() usually has a close accounting of the aggregated vector length contained in the log vector chain passed to xlog_write(). There is no need to iterate the chain to calculate he length of the data in xlog_write_calculate_len() if the caller is already iterating that chain to build it. Passing in the vector length avoids doing an extra chain iteration, which can be a significant amount of work given that large CIL commits can have hundreds of thousands of vectors attached to the chain. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 35 +++++------------------------------ fs/xfs/xfs_log_cil.c | 25 +++++++++++++++++-------- fs/xfs/xfs_log_priv.h | 2 +- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4e3fc28c12f5..e03a9419f5cf 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -958,7 +958,8 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(unmount_rec); - return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS); + return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS, + reg.i_len); } /* @@ -2209,32 +2210,6 @@ xlog_print_trans( } } -/* - * Calculate the potential space needed by the log vector. All regions contain - * their own opheaders and they are accounted for in region space so we don't - * need to add them to the vector length here. - */ -static int -xlog_write_calc_vec_length( - struct xlog_ticket *ticket, - struct xfs_log_vec *log_vector, - uint optype) -{ - struct xfs_log_vec *lv; - int len = 0; - int i; - - for (lv = log_vector; lv; lv = lv->lv_next) { - /* we don't write ordered log vectors */ - if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) - continue; - - for (i = 0; i < lv->lv_niovecs; i++) - len += lv->lv_iovecp[i].i_len; - } - return len; -} - static xlog_op_header_t * xlog_write_setup_ophdr( struct xlog_op_header *ophdr, @@ -2388,13 +2363,14 @@ xlog_write( struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, - uint optype) + uint optype, + uint32_t len) + { struct xlog_in_core *iclog = NULL; struct xfs_log_vec *lv = log_vector; struct xfs_log_iovec *vecp = lv->lv_iovecp; int index = 0; - int len; int partial_copy = 0; int partial_copy_len = 0; int contwr = 0; @@ -2409,7 +2385,6 @@ xlog_write( xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } - len = xlog_write_calc_vec_length(ticket, log_vector, optype); while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5ccbb6bd4655..dad2b527c3ff 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -825,7 +825,8 @@ restart: static int xlog_cil_write_chain( struct xfs_cil_ctx *ctx, - struct xfs_log_vec *chain) + struct xfs_log_vec *chain, + uint32_t chain_len) { struct xlog *log = ctx->cil->xc_log; int error; @@ -833,7 +834,8 @@ xlog_cil_write_chain( error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); if (error) return error; - return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS); + return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS, + chain_len); } /* @@ -872,7 +874,8 @@ xlog_cil_write_commit_record( /* account for space used by record data */ ctx->ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS); + error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS, + reg.i_len); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; @@ -935,11 +938,12 @@ xlog_cil_build_trans_hdr( sizeof(struct xfs_trans_header); hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR; - tic->t_curr_res -= hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; - lvhdr->lv_niovecs = 2; lvhdr->lv_iovecp = &hdr->lhdr[0]; + lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; lvhdr->lv_next = ctx->lv_chain; + + tic->t_curr_res -= lvhdr->lv_bytes; } /* @@ -966,7 +970,8 @@ xlog_cil_push_work( struct xlog *log = cil->xc_log; struct xfs_log_vec *lv; struct xfs_cil_ctx *new_ctx; - int num_iovecs; + int num_iovecs = 0; + int num_bytes = 0; int error = 0; struct xlog_cil_trans_hdr thdr; struct xfs_log_vec lvhdr = { NULL }; @@ -1047,7 +1052,6 @@ xlog_cil_push_work( * by the flush lock. */ lv = NULL; - num_iovecs = 0; while (!list_empty(&cil->xc_cil)) { struct xfs_log_item *item; @@ -1061,6 +1065,10 @@ xlog_cil_push_work( lv = item->li_lv; item->li_lv = NULL; num_iovecs += lv->lv_niovecs; + + /* we don't write ordered log vectors */ + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) + num_bytes += lv->lv_bytes; } /* @@ -1099,8 +1107,9 @@ xlog_cil_push_work( * transaction header here as it is not accounted for in xlog_write(). */ xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs); + num_bytes += lvhdr.lv_bytes; - error = xlog_cil_write_chain(ctx, &lvhdr); + error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes); if (error) goto out_abort_free_ticket; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b588bb3d02cf..a070a2c827ce 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -503,7 +503,7 @@ void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, - uint optype); + uint optype, uint32_t len); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From decb545fc081a1f03d4216831afb82482e8b6342 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Apr 2022 10:35:53 +1000 Subject: [PATCH 10/16] xfs: change the type of ic_datap Turn ic_datap from a char into a void pointer given that it points to arbitrary data. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Chandan Babu R Reviewed-by: Darrick J. Wong [dgc: also remove (char *) cast in xlog_alloc_log()] Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 7 +++---- fs/xfs/xfs_log_priv.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e03a9419f5cf..4cac816023a4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1644,7 +1644,7 @@ xlog_alloc_log( iclog->ic_log = log; atomic_set(&iclog->ic_refcnt, 0); INIT_LIST_HEAD(&iclog->ic_callbacks); - iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; + iclog->ic_datap = (void *)iclog->ic_data + log->l_iclog_hsize; init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -3664,7 +3664,7 @@ xlog_verify_iclog( if (field_offset & 0x1ff) { clientid = ophead->oh_clientid; } else { - idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); + idx = BTOBBT((void *)&ophead->oh_clientid - iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3687,8 +3687,7 @@ xlog_verify_iclog( if (field_offset & 0x1ff) { op_len = be32_to_cpu(ophead->oh_len); } else { - idx = BTOBBT((uintptr_t)&ophead->oh_len - - (uintptr_t)iclog->ic_datap); + idx = BTOBBT((void *)&ophead->oh_len - iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index a070a2c827ce..20d818c14003 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -190,7 +190,7 @@ typedef struct xlog_in_core { u32 ic_offset; enum xlog_iclog_state ic_state; unsigned int ic_flags; - char *ic_datap; /* pointer to iclog data */ + void *ic_datap; /* pointer to iclog data */ struct list_head ic_callbacks; /* reference counts need their own cacheline */ From db357078b0423e0a86a633780cbca3f01c54885d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:36:05 +1000 Subject: [PATCH 11/16] xfs: introduce xlog_write_full() Introduce an optimised version of xlog_write() that is used when the entire write will fit in a single iclog. This greatly simplifies the implementation of writing a log vector chain into an iclog, and sets the ground work for a much more understandable xlog_write() implementation. This incorporates some factoring and simplifications proposed by Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 69 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4cac816023a4..d1f4d9031df3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2210,6 +2210,58 @@ xlog_print_trans( } } +static inline void +xlog_write_iovec( + struct xlog_in_core *iclog, + uint32_t *log_offset, + void *data, + uint32_t write_len, + int *bytes_left, + uint32_t *record_cnt, + uint32_t *data_cnt) +{ + ASSERT(*log_offset % sizeof(int32_t) == 0); + ASSERT(write_len % sizeof(int32_t) == 0); + + memcpy(iclog->ic_datap + *log_offset, data, write_len); + *log_offset += write_len; + *bytes_left -= write_len; + (*record_cnt)++; + *data_cnt += write_len; +} + +/* + * Write log vectors into a single iclog which is guaranteed by the caller + * to have enough space to write the entire log vector into. + */ +static void +xlog_write_full( + struct xfs_log_vec *lv, + struct xlog_ticket *ticket, + struct xlog_in_core *iclog, + uint32_t *log_offset, + uint32_t *len, + uint32_t *record_cnt, + uint32_t *data_cnt) +{ + int index; + + ASSERT(*log_offset + *len <= iclog->ic_size); + + /* + * Ordered log vectors have no regions to write so this + * loop will naturally skip them. + */ + for (index = 0; index < lv->lv_niovecs; index++) { + struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; + struct xlog_op_header *ophdr = reg->i_addr; + + ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + xlog_write_iovec(iclog, log_offset, reg->i_addr, + reg->i_len, len, record_cnt, data_cnt); + } +} + static xlog_op_header_t * xlog_write_setup_ophdr( struct xlog_op_header *ophdr, @@ -2374,8 +2426,8 @@ xlog_write( int partial_copy = 0; int partial_copy_len = 0; int contwr = 0; - int record_cnt = 0; - int data_cnt = 0; + uint32_t record_cnt = 0; + uint32_t data_cnt = 0; int error = 0; if (ticket->t_curr_res < 0) { @@ -2395,7 +2447,6 @@ xlog_write( return error; ASSERT(log_offset <= iclog->ic_size - 1); - ptr = iclog->ic_datap + log_offset; /* * If we have a context pointer, pass it the first iclog we are @@ -2407,10 +2458,22 @@ xlog_write( ctx = NULL; } + /* If this is a single iclog write, go fast... */ + if (!contwr && lv == log_vector) { + while (lv) { + xlog_write_full(lv, ticket, iclog, &log_offset, + &len, &record_cnt, &data_cnt); + lv = lv->lv_next; + } + data_cnt = 0; + break; + } + /* * This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). */ + ptr = iclog->ic_datap + log_offset; while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { struct xfs_log_iovec *reg; struct xlog_op_header *ophdr; From ad3e3693182bb990484b187b33c7f9735bb549be Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:36:15 +1000 Subject: [PATCH 12/16] xfs: introduce xlog_write_partial() Re-implement writing of a log vector that does not fit into the current iclog. The iclog will already be in XLOG_STATE_WANT_SYNC because xlog_get_iclog_space() will have reserved all the remaining iclog space for us, hence we can simply iterate over the iovecs in the log vector getting more iclog space until the entire log vector is written. Handling this partial write case separately means we do need to pass unnecessary state around for the common, fast path case when the log vector fits entirely within the current iclog. It isolates the complexity and allows us to modify and improve the partial write case without impacting the simple fast path. This change includes several improvements incorporated from patches written by Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 432 +++++++++++++++++++----------------------- fs/xfs/xfs_log_priv.h | 8 - 2 files changed, 200 insertions(+), 240 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d1f4d9031df3..29e09a888f38 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2246,7 +2246,8 @@ xlog_write_full( { int index; - ASSERT(*log_offset + *len <= iclog->ic_size); + ASSERT(*log_offset + *len <= iclog->ic_size || + iclog->ic_state == XLOG_STATE_WANT_SYNC); /* * Ordered log vectors have no regions to write so this @@ -2262,111 +2263,177 @@ xlog_write_full( } } -static xlog_op_header_t * -xlog_write_setup_ophdr( - struct xlog_op_header *ophdr, - struct xlog_ticket *ticket) +static int +xlog_write_get_more_iclog_space( + struct xlog_ticket *ticket, + struct xlog_in_core **iclogp, + uint32_t *log_offset, + uint32_t len, + uint32_t *record_cnt, + uint32_t *data_cnt, + int *contwr) { - ophdr->oh_clientid = XFS_TRANSACTION; - ophdr->oh_res2 = 0; - ophdr->oh_flags = 0; - return ophdr; + struct xlog_in_core *iclog = *iclogp; + struct xlog *log = iclog->ic_log; + int error; + + spin_lock(&log->l_icloglock); + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC); + xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); + error = xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + if (error) + return error; + + error = xlog_state_get_iclog_space(log, len, &iclog, + ticket, contwr, log_offset); + if (error) + return error; + *record_cnt = 0; + *data_cnt = 0; + *iclogp = iclog; + return 0; } /* - * Set up the parameters of the region copy into the log. This has - * to handle region write split across multiple log buffers - this - * state is kept external to this function so that this code can - * be written in an obvious, self documenting manner. + * Write log vectors into a single iclog which is smaller than the current chain + * length. We write until we cannot fit a full record into the remaining space + * and then stop. We return the log vector that is to be written that cannot + * wholly fit in the iclog. */ static int -xlog_write_setup_copy( +xlog_write_partial( + struct xfs_log_vec *lv, struct xlog_ticket *ticket, - struct xlog_op_header *ophdr, - int space_available, - int space_required, - int *copy_off, - int *copy_len, - int *last_was_partial_copy, - int *bytes_consumed) -{ - int still_to_copy; - - still_to_copy = space_required - *bytes_consumed; - *copy_off = *bytes_consumed; - - if (still_to_copy <= space_available) { - /* write of region completes here */ - *copy_len = still_to_copy; - ophdr->oh_len = cpu_to_be32(*copy_len); - if (*last_was_partial_copy) - ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); - *last_was_partial_copy = 0; - *bytes_consumed = 0; - return 0; - } - - /* partial write of region, needs extra log op header reservation */ - *copy_len = space_available; - ophdr->oh_len = cpu_to_be32(*copy_len); - ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - if (*last_was_partial_copy) - ophdr->oh_flags |= XLOG_WAS_CONT_TRANS; - *bytes_consumed += *copy_len; - (*last_was_partial_copy)++; - - /* account for new log op header */ - ticket->t_curr_res -= sizeof(struct xlog_op_header); - - return sizeof(struct xlog_op_header); -} - -static int -xlog_write_copy_finish( - struct xlog *log, - struct xlog_in_core *iclog, - uint flags, - int *record_cnt, - int *data_cnt, - int *partial_copy, - int *partial_copy_len, - int log_offset) + struct xlog_in_core **iclogp, + uint32_t *log_offset, + uint32_t *len, + uint32_t *record_cnt, + uint32_t *data_cnt, + int *contwr) { + struct xlog_in_core *iclog = *iclogp; + struct xlog *log = iclog->ic_log; + struct xlog_op_header *ophdr; + int index = 0; + uint32_t rlen; int error; - if (*partial_copy) { + /* walk the logvec, copying until we run out of space in the iclog */ + for (index = 0; index < lv->lv_niovecs; index++) { + struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; + uint32_t reg_offset = 0; + /* - * This iclog has already been marked WANT_SYNC by - * xlog_state_get_iclog_space. + * The first region of a continuation must have a non-zero + * length otherwise log recovery will just skip over it and + * start recovering from the next opheader it finds. Because we + * mark the next opheader as a continuation, recovery will then + * incorrectly add the continuation to the previous region and + * that breaks stuff. + * + * Hence if there isn't space for region data after the + * opheader, then we need to start afresh with a new iclog. */ - spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - *record_cnt = 0; - *data_cnt = 0; - goto release_iclog; + if (iclog->ic_size - *log_offset <= + sizeof(struct xlog_op_header)) { + error = xlog_write_get_more_iclog_space(ticket, + &iclog, log_offset, *len, record_cnt, + data_cnt, contwr); + if (error) + return error; + } + + ophdr = reg->i_addr; + rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset); + + ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header)); + if (rlen != reg->i_len) + ophdr->oh_flags |= XLOG_CONTINUE_TRANS; + + xlog_verify_dest_ptr(log, iclog->ic_datap + *log_offset); + xlog_write_iovec(iclog, log_offset, reg->i_addr, + rlen, len, record_cnt, data_cnt); + + /* If we wrote the whole region, move to the next. */ + if (rlen == reg->i_len) + continue; + + /* + * We now have a partially written iovec, but it can span + * multiple iclogs so we loop here. First we release the iclog + * we currently have, then we get a new iclog and add a new + * opheader. Then we continue copying from where we were until + * we either complete the iovec or fill the iclog. If we + * complete the iovec, then we increment the index and go right + * back to the top of the outer loop. if we fill the iclog, we + * run the inner loop again. + * + * This is complicated by the tail of a region using all the + * space in an iclog and hence requiring us to release the iclog + * and get a new one before returning to the outer loop. We must + * always guarantee that we exit this inner loop with at least + * space for log transaction opheaders left in the current + * iclog, hence we cannot just terminate the loop at the end + * of the of the continuation. So we loop while there is no + * space left in the current iclog, and check for the end of the + * continuation after getting a new iclog. + */ + do { + /* + * Ensure we include the continuation opheader in the + * space we need in the new iclog by adding that size + * to the length we require. This continuation opheader + * needs to be accounted to the ticket as the space it + * consumes hasn't been accounted to the lv we are + * writing. + */ + error = xlog_write_get_more_iclog_space(ticket, + &iclog, log_offset, + *len + sizeof(struct xlog_op_header), + record_cnt, data_cnt, contwr); + if (error) + return error; + + ophdr = iclog->ic_datap + *log_offset; + ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr->oh_clientid = XFS_TRANSACTION; + ophdr->oh_res2 = 0; + ophdr->oh_flags = XLOG_WAS_CONT_TRANS; + + ticket->t_curr_res -= sizeof(struct xlog_op_header); + *log_offset += sizeof(struct xlog_op_header); + *data_cnt += sizeof(struct xlog_op_header); + + /* + * If rlen fits in the iclog, then end the region + * continuation. Otherwise we're going around again. + */ + reg_offset += rlen; + rlen = reg->i_len - reg_offset; + if (rlen <= iclog->ic_size - *log_offset) + ophdr->oh_flags |= XLOG_END_TRANS; + else + ophdr->oh_flags |= XLOG_CONTINUE_TRANS; + + rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset); + ophdr->oh_len = cpu_to_be32(rlen); + + xlog_verify_dest_ptr(log, iclog->ic_datap + *log_offset); + xlog_write_iovec(iclog, log_offset, + reg->i_addr + reg_offset, + rlen, len, record_cnt, data_cnt); + + } while (ophdr->oh_flags & XLOG_CONTINUE_TRANS); } - *partial_copy = 0; - *partial_copy_len = 0; - - if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t)) - return 0; - - /* no more space in this iclog - push it. */ - spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - *record_cnt = 0; - *data_cnt = 0; - - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - xlog_is_shutdown(log)); -release_iclog: - error = xlog_state_release_iclog(log, iclog); - spin_unlock(&log->l_icloglock); - return error; + /* + * No more iovecs remain in this logvec so return the next log vec to + * the caller so it can go back to fast path copying. + */ + *iclogp = iclog; + return 0; } /* @@ -2421,14 +2488,11 @@ xlog_write( { struct xlog_in_core *iclog = NULL; struct xfs_log_vec *lv = log_vector; - struct xfs_log_iovec *vecp = lv->lv_iovecp; - int index = 0; - int partial_copy = 0; - int partial_copy_len = 0; int contwr = 0; uint32_t record_cnt = 0; uint32_t data_cnt = 0; int error = 0; + int log_offset; if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, @@ -2437,151 +2501,54 @@ xlog_write( xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } - while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { - void *ptr; - int log_offset; + error = xlog_state_get_iclog_space(log, len, &iclog, ticket, + &contwr, &log_offset); + if (error) + return error; - error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - &contwr, &log_offset); - if (error) - return error; + ASSERT(log_offset <= iclog->ic_size - 1); - ASSERT(log_offset <= iclog->ic_size - 1); + /* + * If we have a context pointer, pass it the first iclog we are + * writing to so it can record state needed for iclog write + * ordering. + */ + if (ctx) + xlog_cil_set_ctx_write_state(ctx, iclog); + while (lv) { /* - * If we have a context pointer, pass it the first iclog we are - * writing to so it can record state needed for iclog write - * ordering. + * If the entire log vec does not fit in the iclog, punt it to + * the partial copy loop which can handle this case. */ - if (ctx) { - xlog_cil_set_ctx_write_state(ctx, iclog); - ctx = NULL; - } - - /* If this is a single iclog write, go fast... */ - if (!contwr && lv == log_vector) { - while (lv) { - xlog_write_full(lv, ticket, iclog, &log_offset, - &len, &record_cnt, &data_cnt); - lv = lv->lv_next; - } - data_cnt = 0; - break; - } - - /* - * This loop writes out as many regions as can fit in the amount - * of space which was allocated by xlog_state_get_iclog_space(). - */ - ptr = iclog->ic_datap + log_offset; - while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { - struct xfs_log_iovec *reg; - struct xlog_op_header *ophdr; - int copy_len; - int copy_off; - bool ordered = false; - bool added_ophdr = false; - - /* ordered log vectors have no regions to write */ - if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { - ASSERT(lv->lv_niovecs == 0); - ordered = true; - goto next_lv; - } - - reg = &vecp[index]; - ASSERT(reg->i_len % sizeof(int32_t) == 0); - ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); - - /* - * Regions always have their ophdr at the start of the - * region, except for: - * - a transaction start which has a start record ophdr - * before the first region ophdr; and - * - the previous region didn't fully fit into an iclog - * so needs a continuation ophdr to prepend the region - * in this new iclog. - */ - ophdr = reg->i_addr; - if (optype && index) { - optype &= ~XLOG_START_TRANS; - } else if (partial_copy) { - ophdr = xlog_write_setup_ophdr(ptr, ticket); - xlog_write_adv_cnt(&ptr, &len, &log_offset, - sizeof(struct xlog_op_header)); - added_ophdr = true; - } - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); - - len += xlog_write_setup_copy(ticket, ophdr, - iclog->ic_size-log_offset, - reg->i_len, - ©_off, ©_len, - &partial_copy, - &partial_copy_len); - xlog_verify_dest_ptr(log, ptr); - - - /* - * Wart: need to update length in embedded ophdr not - * to include it's own length. - */ - if (!added_ophdr) { - ophdr->oh_len = cpu_to_be32(copy_len - - sizeof(struct xlog_op_header)); - } - - ASSERT(copy_len > 0); - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); - - if (added_ophdr) - copy_len += sizeof(struct xlog_op_header); - record_cnt++; - data_cnt += contwr ? copy_len : 0; - - error = xlog_write_copy_finish(log, iclog, optype, - &record_cnt, &data_cnt, - &partial_copy, - &partial_copy_len, - log_offset); - if (error) + if (lv->lv_niovecs && + lv->lv_bytes > iclog->ic_size - log_offset) { + error = xlog_write_partial(lv, ticket, &iclog, + &log_offset, &len, &record_cnt, + &data_cnt, &contwr); + if (error) { + /* + * We have no iclog to release, so just return + * the error immediately. + */ return error; - - /* - * if we had a partial copy, we need to get more iclog - * space but we don't want to increment the region - * index because there is still more is this region to - * write. - * - * If we completed writing this region, and we flushed - * the iclog (indicated by resetting of the record - * count), then we also need to get more log space. If - * this was the last record, though, we are done and - * can just return. - */ - if (partial_copy) - break; - - if (++index == lv->lv_niovecs) { -next_lv: - lv = lv->lv_next; - index = 0; - if (lv) - vecp = lv->lv_iovecp; - } - if (record_cnt == 0 && !ordered) { - if (!lv) - return 0; - break; } + } else { + xlog_write_full(lv, ticket, iclog, &log_offset, + &len, &record_cnt, &data_cnt); } + lv = lv->lv_next; } - ASSERT(len == 0); + /* + * We've already been guaranteed that the last writes will fit inside + * the current iclog, and hence it will already have the space used by + * those writes accounted to it. Hence we do not need to update the + * iclog with the number of bytes written here. + */ spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); + xlog_state_finish_copy(log, iclog, record_cnt, 0); error = xlog_state_release_iclog(log, iclog); spin_unlock(&log->l_icloglock); @@ -3738,11 +3705,12 @@ xlog_verify_iclog( iclog->ic_header.h_cycle_data[idx]); } } - if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) + if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) { xfs_warn(log->l_mp, - "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx", - __func__, clientid, ophead, + "%s: op %d invalid clientid %d op "PTR_FMT" offset 0x%lx", + __func__, i, clientid, ophead, (unsigned long)field_offset); + } /* check length */ p = &ophead->oh_len; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 20d818c14003..626e293ab2a4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -491,14 +491,6 @@ extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, int count, bool permanent); -static inline void -xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) -{ - *ptr += bytes; - *len -= bytes; - *off += bytes; -} - void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, From 1236bbe86bb83165ff6ba68ce19d81713340f597 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Apr 2022 10:36:27 +1000 Subject: [PATCH 13/16] xfs: remove xlog_verify_dest_ptr Just check that the offset in xlog_write_vec is smaller than the iclog size and remove the expensive cycling through all iclogs. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Chandan Babu R Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 35 +---------------------------------- fs/xfs/xfs_log_priv.h | 4 ---- 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 29e09a888f38..106331ee454e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -61,10 +61,6 @@ xlog_sync( struct xlog_in_core *iclog); #if defined(DEBUG) STATIC void -xlog_verify_dest_ptr( - struct xlog *log, - void *ptr); -STATIC void xlog_verify_grant_tail( struct xlog *log); STATIC void @@ -77,7 +73,6 @@ xlog_verify_tail_lsn( struct xlog *log, struct xlog_in_core *iclog); #else -#define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) #define xlog_verify_tail_lsn(a,b) @@ -1626,9 +1621,6 @@ xlog_alloc_log( GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!iclog->ic_data) goto out_free_iclog; -#ifdef DEBUG - log->l_iclog_bak[i] = &iclog->ic_header; -#endif head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); @@ -2220,6 +2212,7 @@ xlog_write_iovec( uint32_t *record_cnt, uint32_t *data_cnt) { + ASSERT(*log_offset < iclog->ic_log->l_iclog_size); ASSERT(*log_offset % sizeof(int32_t) == 0); ASSERT(write_len % sizeof(int32_t) == 0); @@ -2313,7 +2306,6 @@ xlog_write_partial( int *contwr) { struct xlog_in_core *iclog = *iclogp; - struct xlog *log = iclog->ic_log; struct xlog_op_header *ophdr; int index = 0; uint32_t rlen; @@ -2352,7 +2344,6 @@ xlog_write_partial( if (rlen != reg->i_len) ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - xlog_verify_dest_ptr(log, iclog->ic_datap + *log_offset); xlog_write_iovec(iclog, log_offset, reg->i_addr, rlen, len, record_cnt, data_cnt); @@ -2420,7 +2411,6 @@ xlog_write_partial( rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset); ophdr->oh_len = cpu_to_be32(rlen); - xlog_verify_dest_ptr(log, iclog->ic_datap + *log_offset); xlog_write_iovec(iclog, log_offset, reg->i_addr + reg_offset, rlen, len, record_cnt, data_cnt); @@ -3544,29 +3534,6 @@ xlog_ticket_alloc( } #if defined(DEBUG) -/* - * Make sure that the destination ptr is within the valid data region of - * one of the iclogs. This uses backup pointers stored in a different - * part of the log in case we trash the log structure. - */ -STATIC void -xlog_verify_dest_ptr( - struct xlog *log, - void *ptr) -{ - int i; - int good_ptr = 0; - - for (i = 0; i < log->l_iclog_bufs; i++) { - if (ptr >= log->l_iclog_bak[i] && - ptr <= log->l_iclog_bak[i] + log->l_iclog_size) - good_ptr++; - } - - if (!good_ptr) - xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); -} - /* * Check to make sure the grant write head didn't just over lap the tail. If * the cycles are the same, we can't be overlapping. Otherwise, make sure that diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 626e293ab2a4..2fbec7a30e59 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -420,10 +420,6 @@ struct xlog { struct xfs_kobj l_kobj; - /* The following field are used for debugging; need to hold icloglock */ -#ifdef DEBUG - void *l_iclog_bak[XLOG_MAX_ICLOGS]; -#endif /* log recovery lsn tracking (for buffer submission */ xfs_lsn_t l_recovery_lsn; From be8ddda5f7e01229729a3e00e9971cc2b8a9ec10 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:36:37 +1000 Subject: [PATCH 14/16] xfs: xlog_write() no longer needs contwr state The rework of xlog_write() no longer requires xlog_get_iclog_state() to tell it about internal iclog space reservation state to direct it on what to do. Remove this parameter. $ size fs/xfs/xfs_log.o.* text data bss dec hex filename 26520 560 8 27088 69d0 fs/xfs/xfs_log.o.orig 26384 560 8 26952 6948 fs/xfs/xfs_log.o.patched Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 106331ee454e..13273a906f1b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -49,7 +49,6 @@ xlog_state_get_iclog_space( int len, struct xlog_in_core **iclog, struct xlog_ticket *ticket, - int *continued_write, int *logoffsetp); STATIC void xlog_grant_push_ail( @@ -2263,8 +2262,7 @@ xlog_write_get_more_iclog_space( uint32_t *log_offset, uint32_t len, uint32_t *record_cnt, - uint32_t *data_cnt, - int *contwr) + uint32_t *data_cnt) { struct xlog_in_core *iclog = *iclogp; struct xlog *log = iclog->ic_log; @@ -2278,8 +2276,8 @@ xlog_write_get_more_iclog_space( if (error) return error; - error = xlog_state_get_iclog_space(log, len, &iclog, - ticket, contwr, log_offset); + error = xlog_state_get_iclog_space(log, len, &iclog, ticket, + log_offset); if (error) return error; *record_cnt = 0; @@ -2302,8 +2300,7 @@ xlog_write_partial( uint32_t *log_offset, uint32_t *len, uint32_t *record_cnt, - uint32_t *data_cnt, - int *contwr) + uint32_t *data_cnt) { struct xlog_in_core *iclog = *iclogp; struct xlog_op_header *ophdr; @@ -2331,7 +2328,7 @@ xlog_write_partial( sizeof(struct xlog_op_header)) { error = xlog_write_get_more_iclog_space(ticket, &iclog, log_offset, *len, record_cnt, - data_cnt, contwr); + data_cnt); if (error) return error; } @@ -2383,7 +2380,7 @@ xlog_write_partial( error = xlog_write_get_more_iclog_space(ticket, &iclog, log_offset, *len + sizeof(struct xlog_op_header), - record_cnt, data_cnt, contwr); + record_cnt, data_cnt); if (error) return error; @@ -2478,7 +2475,6 @@ xlog_write( { struct xlog_in_core *iclog = NULL; struct xfs_log_vec *lv = log_vector; - int contwr = 0; uint32_t record_cnt = 0; uint32_t data_cnt = 0; int error = 0; @@ -2492,7 +2488,7 @@ xlog_write( } error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - &contwr, &log_offset); + &log_offset); if (error) return error; @@ -2515,7 +2511,7 @@ xlog_write( lv->lv_bytes > iclog->ic_size - log_offset) { error = xlog_write_partial(lv, ticket, &iclog, &log_offset, &len, &record_cnt, - &data_cnt, &contwr); + &data_cnt); if (error) { /* * We have no iclog to release, so just return @@ -2895,7 +2891,6 @@ xlog_state_get_iclog_space( int len, struct xlog_in_core **iclogp, struct xlog_ticket *ticket, - int *continued_write, int *logoffsetp) { int log_offset; @@ -2973,13 +2968,10 @@ restart: * iclogs (to mark it taken), this particular iclog will release/sync * to disk in xlog_write(). */ - if (len <= iclog->ic_size - iclog->ic_offset) { - *continued_write = 0; + if (len <= iclog->ic_size - iclog->ic_offset) iclog->ic_offset += len; - } else { - *continued_write = 1; + else xlog_state_switch_iclogs(log, iclog, iclog->ic_size); - } *iclogp = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); From 14b07ecd5cd2545fcee3ff29119405f7cf2f59ad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:36:48 +1000 Subject: [PATCH 15/16] xfs: xlog_write() doesn't need optype anymore So remove it from the interface and callers. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 4 +--- fs/xfs/xfs_log_cil.c | 6 ++---- fs/xfs/xfs_log_priv.h | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 13273a906f1b..c3fb2b0b40d3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -952,8 +952,7 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(unmount_rec); - return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS, - reg.i_len); + return xlog_write(log, NULL, &vec, ticket, reg.i_len); } /* @@ -2469,7 +2468,6 @@ xlog_write( struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, - uint optype, uint32_t len) { diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index dad2b527c3ff..9065250227f9 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -834,8 +834,7 @@ xlog_cil_write_chain( error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); if (error) return error; - return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS, - chain_len); + return xlog_write(log, ctx, chain, ctx->ticket, chain_len); } /* @@ -874,8 +873,7 @@ xlog_cil_write_commit_record( /* account for space used by record data */ ctx->ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS, - reg.i_len); + error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2fbec7a30e59..b3d5ee85dd7e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -491,7 +491,7 @@ void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, - uint optype, uint32_t len); + uint32_t len); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From 593e34391faafd72102bd79c43994f32e9dd0c91 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 10:36:56 +1000 Subject: [PATCH 16/16] xfs: CIL context doesn't need to count iovecs Now that we account for log opheaders in the log item formatting code, we don't actually use the aggregated count of log iovecs in the CIL for anything. Remove it and the tracking code that calculates it. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_cil.c | 22 ++++++---------------- fs/xfs/xfs_log_priv.h | 1 - 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9065250227f9..e5ab62f08c19 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -284,22 +284,18 @@ xlog_cil_alloc_shadow_bufs( /* * Prepare the log item for insertion into the CIL. Calculate the difference in - * log space and vectors it will consume, and if it is a new item pin it as - * well. + * log space it will consume, and if it is a new item pin it as well. */ STATIC void xfs_cil_prepare_item( struct xlog *log, struct xfs_log_vec *lv, struct xfs_log_vec *old_lv, - int *diff_len, - int *diff_iovecs) + int *diff_len) { /* Account for the new LV being passed in */ - if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) *diff_len += lv->lv_bytes; - *diff_iovecs += lv->lv_niovecs; - } /* * If there is no old LV, this is the first time we've seen the item in @@ -316,7 +312,6 @@ xfs_cil_prepare_item( ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); *diff_len -= old_lv->lv_bytes; - *diff_iovecs -= old_lv->lv_niovecs; lv->lv_item->li_lv_shadow = old_lv; } @@ -365,12 +360,10 @@ static void xlog_cil_insert_format_items( struct xlog *log, struct xfs_trans *tp, - int *diff_len, - int *diff_iovecs) + int *diff_len) { struct xfs_log_item *lip; - /* Bail out if we didn't find a log item. */ if (list_empty(&tp->t_items)) { ASSERT(0); @@ -413,7 +406,6 @@ xlog_cil_insert_format_items( * set the item up as though it is a new insertion so * that the space reservation accounting is correct. */ - *diff_iovecs -= lv->lv_niovecs; *diff_len -= lv->lv_bytes; /* Ensure the lv is set up according to ->iop_size */ @@ -438,7 +430,7 @@ xlog_cil_insert_format_items( ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); lip->li_ops->iop_format(lip, lv); insert: - xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); + xfs_cil_prepare_item(log, lv, old_lv, diff_len); } } @@ -458,7 +450,6 @@ xlog_cil_insert_items( struct xfs_cil_ctx *ctx = cil->xc_ctx; struct xfs_log_item *lip; int len = 0; - int diff_iovecs = 0; int iclog_space; int iovhdr_res = 0, split_res = 0, ctx_res = 0; @@ -468,7 +459,7 @@ xlog_cil_insert_items( * We can do this safely because the context can't checkpoint until we * are done so it doesn't matter exactly how we update the CIL. */ - xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs); + xlog_cil_insert_format_items(log, tp, &len); spin_lock(&cil->xc_cil_lock); @@ -503,7 +494,6 @@ xlog_cil_insert_items( } tp->t_ticket->t_curr_res -= len; ctx->space_used += len; - ctx->nvecs += diff_iovecs; /* * If we've overrun the reservation, dump the tx details before we move diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b3d5ee85dd7e..4b7303ca1741 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -221,7 +221,6 @@ struct xfs_cil_ctx { xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ - int nvecs; /* number of regions */ int space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */