forked from Minki/linux
Merge branch 'xfs-free-inode-btree' into for-next
This commit is contained in:
commit
b76769294b
@ -160,30 +160,38 @@ typedef struct xfs_agi {
|
||||
* still being referenced.
|
||||
*/
|
||||
__be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
|
||||
|
||||
/*
|
||||
* This marks the end of logging region 1 and start of logging region 2.
|
||||
*/
|
||||
uuid_t agi_uuid; /* uuid of filesystem */
|
||||
__be32 agi_crc; /* crc of agi sector */
|
||||
__be32 agi_pad32;
|
||||
__be64 agi_lsn; /* last write sequence */
|
||||
|
||||
__be32 agi_free_root; /* root of the free inode btree */
|
||||
__be32 agi_free_level;/* levels in free inode btree */
|
||||
|
||||
/* structure must be padded to 64 bit alignment */
|
||||
} xfs_agi_t;
|
||||
|
||||
#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
|
||||
|
||||
#define XFS_AGI_MAGICNUM 0x00000001
|
||||
#define XFS_AGI_VERSIONNUM 0x00000002
|
||||
#define XFS_AGI_SEQNO 0x00000004
|
||||
#define XFS_AGI_LENGTH 0x00000008
|
||||
#define XFS_AGI_COUNT 0x00000010
|
||||
#define XFS_AGI_ROOT 0x00000020
|
||||
#define XFS_AGI_LEVEL 0x00000040
|
||||
#define XFS_AGI_FREECOUNT 0x00000080
|
||||
#define XFS_AGI_NEWINO 0x00000100
|
||||
#define XFS_AGI_DIRINO 0x00000200
|
||||
#define XFS_AGI_UNLINKED 0x00000400
|
||||
#define XFS_AGI_NUM_BITS 11
|
||||
#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1)
|
||||
#define XFS_AGI_MAGICNUM (1 << 0)
|
||||
#define XFS_AGI_VERSIONNUM (1 << 1)
|
||||
#define XFS_AGI_SEQNO (1 << 2)
|
||||
#define XFS_AGI_LENGTH (1 << 3)
|
||||
#define XFS_AGI_COUNT (1 << 4)
|
||||
#define XFS_AGI_ROOT (1 << 5)
|
||||
#define XFS_AGI_LEVEL (1 << 6)
|
||||
#define XFS_AGI_FREECOUNT (1 << 7)
|
||||
#define XFS_AGI_NEWINO (1 << 8)
|
||||
#define XFS_AGI_DIRINO (1 << 9)
|
||||
#define XFS_AGI_UNLINKED (1 << 10)
|
||||
#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
|
||||
#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
|
||||
#define XFS_AGI_FREE_ROOT (1 << 11)
|
||||
#define XFS_AGI_FREE_LEVEL (1 << 12)
|
||||
#define XFS_AGI_NUM_BITS_R2 13
|
||||
|
||||
/* disk block (xfs_daddr_t) in the AG */
|
||||
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
|
||||
|
@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone;
|
||||
* Btree magic numbers.
|
||||
*/
|
||||
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
|
||||
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
|
||||
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
|
||||
XFS_FIBT_MAGIC },
|
||||
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
|
||||
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
|
||||
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
|
||||
};
|
||||
#define xfs_btree_magic(cur) \
|
||||
xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
|
||||
@ -1115,6 +1116,7 @@ xfs_btree_set_refs(
|
||||
xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
|
||||
break;
|
||||
case XFS_BTNUM_INO:
|
||||
case XFS_BTNUM_FINO:
|
||||
xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
|
||||
break;
|
||||
case XFS_BTNUM_BMAP:
|
||||
|
@ -62,6 +62,7 @@ union xfs_btree_rec {
|
||||
#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
|
||||
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
|
||||
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
|
||||
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
|
||||
|
||||
/*
|
||||
* For logging record fields.
|
||||
@ -92,6 +93,7 @@ do { \
|
||||
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
|
||||
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
|
||||
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
|
||||
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
|
||||
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
|
||||
} \
|
||||
} while (0)
|
||||
@ -105,6 +107,7 @@ do { \
|
||||
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
|
||||
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
|
||||
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
|
||||
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
|
||||
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t;
|
||||
*/
|
||||
#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
|
||||
#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
|
||||
#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
|
||||
#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
|
||||
|
||||
typedef __uint64_t xfs_inofree_t;
|
||||
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
|
||||
@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t;
|
||||
* block numbers in the AG.
|
||||
*/
|
||||
#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
|
||||
#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
|
||||
#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
|
||||
|
||||
/*
|
||||
* The first data block of an AG depends on whether the filesystem was formatted
|
||||
* with the finobt feature. If so, account for the finobt reserved root btree
|
||||
* block.
|
||||
*/
|
||||
#define XFS_PREALLOC_BLOCKS(mp) \
|
||||
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
|
||||
XFS_FIBT_BLOCK(mp) + 1 : \
|
||||
XFS_IBT_BLOCK(mp) + 1)
|
||||
|
||||
|
||||
|
||||
|
@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
|
||||
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
|
||||
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
|
||||
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
|
||||
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
|
||||
|
||||
/*
|
||||
* Minimum and maximum sizes need for growth checks.
|
||||
|
@ -104,7 +104,9 @@ xfs_fs_geometry(
|
||||
(xfs_sb_version_hascrc(&mp->m_sb) ?
|
||||
XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
|
||||
(xfs_sb_version_hasftype(&mp->m_sb) ?
|
||||
XFS_FSOP_GEOM_FLAGS_FTYPE : 0);
|
||||
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
|
||||
(xfs_sb_version_hasfinobt(&mp->m_sb) ?
|
||||
XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
|
||||
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
|
||||
mp->m_sb.sb_logsectsize : BBSIZE;
|
||||
geo->rtsectsize = mp->m_sb.sb_blocksize;
|
||||
@ -316,6 +318,10 @@ xfs_growfs_data_private(
|
||||
agi->agi_dirino = cpu_to_be32(NULLAGINO);
|
||||
if (xfs_sb_version_hascrc(&mp->m_sb))
|
||||
uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
|
||||
if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
|
||||
agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
|
||||
agi->agi_free_level = cpu_to_be32(1);
|
||||
}
|
||||
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
|
||||
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
|
||||
|
||||
@ -407,6 +413,34 @@ xfs_growfs_data_private(
|
||||
xfs_buf_relse(bp);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
/*
|
||||
* FINO btree root block
|
||||
*/
|
||||
if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
|
||||
bp = xfs_growfs_get_hdr_buf(mp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0,
|
||||
&xfs_inobt_buf_ops);
|
||||
if (!bp) {
|
||||
error = ENOMEM;
|
||||
goto error0;
|
||||
}
|
||||
|
||||
if (xfs_sb_version_hascrc(&mp->m_sb))
|
||||
xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
|
||||
0, 0, agno,
|
||||
XFS_BTREE_CRC_BLOCKS);
|
||||
else
|
||||
xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
|
||||
0, agno, 0);
|
||||
|
||||
error = xfs_bwrite(bp);
|
||||
xfs_buf_relse(bp);
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
||||
}
|
||||
xfs_trans_agblocks_delta(tp, nfree);
|
||||
/*
|
||||
|
@ -111,6 +111,66 @@ xfs_inobt_get_rec(
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a single inobt record. Cursor must already point to desired location.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_inobt_insert_rec(
|
||||
struct xfs_btree_cur *cur,
|
||||
__int32_t freecount,
|
||||
xfs_inofree_t free,
|
||||
int *stat)
|
||||
{
|
||||
cur->bc_rec.i.ir_freecount = freecount;
|
||||
cur->bc_rec.i.ir_free = free;
|
||||
return xfs_btree_insert(cur, stat);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert records describing a newly allocated inode chunk into the inobt.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_inobt_insert(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agbp,
|
||||
xfs_agino_t newino,
|
||||
xfs_agino_t newlen,
|
||||
xfs_btnum_t btnum)
|
||||
{
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
||||
xfs_agino_t thisino;
|
||||
int i;
|
||||
int error;
|
||||
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
|
||||
|
||||
for (thisino = newino;
|
||||
thisino < newino + newlen;
|
||||
thisino += XFS_INODES_PER_CHUNK) {
|
||||
error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
|
||||
if (error) {
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
return error;
|
||||
}
|
||||
ASSERT(i == 0);
|
||||
|
||||
error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
|
||||
XFS_INOBT_ALL_FREE, &i);
|
||||
if (error) {
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
return error;
|
||||
}
|
||||
ASSERT(i == 1);
|
||||
}
|
||||
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that the number of free inodes in the AGI is correct.
|
||||
*/
|
||||
@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc(
|
||||
{
|
||||
xfs_agi_t *agi; /* allocation group header */
|
||||
xfs_alloc_arg_t args; /* allocation argument structure */
|
||||
xfs_btree_cur_t *cur; /* inode btree cursor */
|
||||
xfs_agnumber_t agno;
|
||||
int error;
|
||||
int i;
|
||||
xfs_agino_t newino; /* new first inode's number */
|
||||
xfs_agino_t newlen; /* new number of inodes */
|
||||
xfs_agino_t thisino; /* current inode number, for loop */
|
||||
int isaligned = 0; /* inode allocation at stripe unit */
|
||||
/* boundary */
|
||||
struct xfs_perag *pag;
|
||||
@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc(
|
||||
agi->agi_newino = cpu_to_be32(newino);
|
||||
|
||||
/*
|
||||
* Insert records describing the new inode chunk into the btree.
|
||||
* Insert records describing the new inode chunk into the btrees.
|
||||
*/
|
||||
cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
|
||||
for (thisino = newino;
|
||||
thisino < newino + newlen;
|
||||
thisino += XFS_INODES_PER_CHUNK) {
|
||||
cur->bc_rec.i.ir_startino = thisino;
|
||||
cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
|
||||
cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
|
||||
error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
|
||||
if (error) {
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
||||
XFS_BTNUM_INO);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
|
||||
error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
||||
XFS_BTNUM_FINO);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
ASSERT(i == 0);
|
||||
error = xfs_btree_insert(cur, &i);
|
||||
if (error) {
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
return error;
|
||||
}
|
||||
ASSERT(i == 1);
|
||||
}
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
/*
|
||||
* Log allocation group header fields
|
||||
*/
|
||||
@ -675,13 +722,10 @@ xfs_ialloc_get_rec(
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an inode.
|
||||
*
|
||||
* The caller selected an AG for us, and made sure that free inodes are
|
||||
* available.
|
||||
* Allocate an inode using the inobt-only algorithm.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_dialloc_ag(
|
||||
xfs_dialloc_ag_inobt(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agbp,
|
||||
xfs_ino_t parent,
|
||||
@ -707,7 +751,7 @@ xfs_dialloc_ag(
|
||||
ASSERT(pag->pagi_freecount > 0);
|
||||
|
||||
restart_pagno:
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
||||
/*
|
||||
* If pagino is 0 (this is the root inode allocation) use newino.
|
||||
* This must work because we've just allocated some.
|
||||
@ -939,6 +983,294 @@ error0:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the free inode btree to allocate an inode based on distance from the
|
||||
* parent. Note that the provided cursor may be deleted and replaced.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_dialloc_ag_finobt_near(
|
||||
xfs_agino_t pagino,
|
||||
struct xfs_btree_cur **ocur,
|
||||
struct xfs_inobt_rec_incore *rec)
|
||||
{
|
||||
struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
|
||||
struct xfs_btree_cur *rcur; /* right search cursor */
|
||||
struct xfs_inobt_rec_incore rrec;
|
||||
int error;
|
||||
int i, j;
|
||||
|
||||
error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (i == 1) {
|
||||
error = xfs_inobt_get_rec(lcur, rec, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
|
||||
/*
|
||||
* See if we've landed in the parent inode record. The finobt
|
||||
* only tracks chunks with at least one free inode, so record
|
||||
* existence is enough.
|
||||
*/
|
||||
if (pagino >= rec->ir_startino &&
|
||||
pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xfs_btree_dup_cursor(lcur, &rcur);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
|
||||
if (error)
|
||||
goto error_rcur;
|
||||
if (j == 1) {
|
||||
error = xfs_inobt_get_rec(rcur, &rrec, &j);
|
||||
if (error)
|
||||
goto error_rcur;
|
||||
XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
|
||||
}
|
||||
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
|
||||
if (i == 1 && j == 1) {
|
||||
/*
|
||||
* Both the left and right records are valid. Choose the closer
|
||||
* inode chunk to the target.
|
||||
*/
|
||||
if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
|
||||
(rrec.ir_startino - pagino)) {
|
||||
*rec = rrec;
|
||||
xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
|
||||
*ocur = rcur;
|
||||
} else {
|
||||
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
|
||||
}
|
||||
} else if (j == 1) {
|
||||
/* only the right record is valid */
|
||||
*rec = rrec;
|
||||
xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
|
||||
*ocur = rcur;
|
||||
} else if (i == 1) {
|
||||
/* only the left record is valid */
|
||||
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error_rcur:
|
||||
xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the free inode btree to find a free inode based on a newino hint. If
|
||||
* the hint is NULL, find the first free inode in the AG.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_dialloc_ag_finobt_newino(
|
||||
struct xfs_agi *agi,
|
||||
struct xfs_btree_cur *cur,
|
||||
struct xfs_inobt_rec_incore *rec)
|
||||
{
|
||||
int error;
|
||||
int i;
|
||||
|
||||
if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
|
||||
error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
|
||||
&i);
|
||||
if (error)
|
||||
return error;
|
||||
if (i == 1) {
|
||||
error = xfs_inobt_get_rec(cur, rec, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the first inode available in the AG.
|
||||
*/
|
||||
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
|
||||
error = xfs_inobt_get_rec(cur, rec, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the inobt based on a modification made to the finobt. Also ensure that
|
||||
* the records from both trees are equivalent post-modification.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_dialloc_ag_update_inobt(
|
||||
struct xfs_btree_cur *cur, /* inobt cursor */
|
||||
struct xfs_inobt_rec_incore *frec, /* finobt record */
|
||||
int offset) /* inode offset */
|
||||
{
|
||||
struct xfs_inobt_rec_incore rec;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
|
||||
error = xfs_inobt_get_rec(cur, &rec, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(i == 1);
|
||||
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
|
||||
XFS_INODES_PER_CHUNK) == 0);
|
||||
|
||||
rec.ir_free &= ~XFS_INOBT_MASK(offset);
|
||||
rec.ir_freecount--;
|
||||
|
||||
XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
|
||||
(rec.ir_freecount == frec->ir_freecount));
|
||||
|
||||
error = xfs_inobt_update(cur, &rec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an inode using the free inode btree, if available. Otherwise, fall
|
||||
* back to the inobt search algorithm.
|
||||
*
|
||||
* The caller selected an AG for us, and made sure that free inodes are
|
||||
* available.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_dialloc_ag(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agbp,
|
||||
xfs_ino_t parent,
|
||||
xfs_ino_t *inop)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
||||
xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
|
||||
xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
|
||||
struct xfs_perag *pag;
|
||||
struct xfs_btree_cur *cur; /* finobt cursor */
|
||||
struct xfs_btree_cur *icur; /* inobt cursor */
|
||||
struct xfs_inobt_rec_incore rec;
|
||||
xfs_ino_t ino;
|
||||
int error;
|
||||
int offset;
|
||||
int i;
|
||||
|
||||
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
|
||||
return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
|
||||
/*
|
||||
* If pagino is 0 (this is the root inode allocation) use newino.
|
||||
* This must work because we've just allocated some.
|
||||
*/
|
||||
if (!pagino)
|
||||
pagino = be32_to_cpu(agi->agi_newino);
|
||||
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
|
||||
|
||||
error = xfs_check_agi_freecount(cur, agi);
|
||||
if (error)
|
||||
goto error_cur;
|
||||
|
||||
/*
|
||||
* The search algorithm depends on whether we're in the same AG as the
|
||||
* parent. If so, find the closest available inode to the parent. If
|
||||
* not, consider the agi hint or find the first free inode in the AG.
|
||||
*/
|
||||
if (agno == pagno)
|
||||
error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
|
||||
else
|
||||
error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
|
||||
if (error)
|
||||
goto error_cur;
|
||||
|
||||
offset = xfs_lowbit64(rec.ir_free);
|
||||
ASSERT(offset >= 0);
|
||||
ASSERT(offset < XFS_INODES_PER_CHUNK);
|
||||
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
||||
XFS_INODES_PER_CHUNK) == 0);
|
||||
ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
|
||||
|
||||
/*
|
||||
* Modify or remove the finobt record.
|
||||
*/
|
||||
rec.ir_free &= ~XFS_INOBT_MASK(offset);
|
||||
rec.ir_freecount--;
|
||||
if (rec.ir_freecount)
|
||||
error = xfs_inobt_update(cur, &rec);
|
||||
else
|
||||
error = xfs_btree_delete(cur, &i);
|
||||
if (error)
|
||||
goto error_cur;
|
||||
|
||||
/*
|
||||
* The finobt has now been updated appropriately. We haven't updated the
|
||||
* agi and superblock yet, so we can create an inobt cursor and validate
|
||||
* the original freecount. If all is well, make the equivalent update to
|
||||
* the inobt using the finobt record and offset information.
|
||||
*/
|
||||
icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
||||
|
||||
error = xfs_check_agi_freecount(icur, agi);
|
||||
if (error)
|
||||
goto error_icur;
|
||||
|
||||
error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
|
||||
if (error)
|
||||
goto error_icur;
|
||||
|
||||
/*
|
||||
* Both trees have now been updated. We must update the perag and
|
||||
* superblock before we can check the freecount for each btree.
|
||||
*/
|
||||
be32_add_cpu(&agi->agi_freecount, -1);
|
||||
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
|
||||
pag->pagi_freecount--;
|
||||
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
|
||||
|
||||
error = xfs_check_agi_freecount(icur, agi);
|
||||
if (error)
|
||||
goto error_icur;
|
||||
error = xfs_check_agi_freecount(cur, agi);
|
||||
if (error)
|
||||
goto error_icur;
|
||||
|
||||
xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
xfs_perag_put(pag);
|
||||
*inop = ino;
|
||||
return 0;
|
||||
|
||||
error_icur:
|
||||
xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
|
||||
error_cur:
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
xfs_perag_put(pag);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an inode on disk.
|
||||
*
|
||||
@ -1098,78 +1430,34 @@ out_error:
|
||||
return XFS_ERROR(error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free disk inode. Carefully avoids touching the incore inode, all
|
||||
* manipulations incore are the caller's responsibility.
|
||||
* The on-disk inode is not changed by this operation, only the
|
||||
* btree (free inode mask) is changed.
|
||||
*/
|
||||
int
|
||||
xfs_difree(
|
||||
xfs_trans_t *tp, /* transaction pointer */
|
||||
xfs_ino_t inode, /* inode to be freed */
|
||||
xfs_bmap_free_t *flist, /* extents to free */
|
||||
int *delete, /* set if inode cluster was deleted */
|
||||
xfs_ino_t *first_ino) /* first inode in deleted cluster */
|
||||
STATIC int
|
||||
xfs_difree_inobt(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agbp,
|
||||
xfs_agino_t agino,
|
||||
struct xfs_bmap_free *flist,
|
||||
int *delete,
|
||||
xfs_ino_t *first_ino,
|
||||
struct xfs_inobt_rec_incore *orec)
|
||||
{
|
||||
/* REFERENCED */
|
||||
xfs_agblock_t agbno; /* block number containing inode */
|
||||
xfs_buf_t *agbp; /* buffer containing allocation group header */
|
||||
xfs_agino_t agino; /* inode number relative to allocation group */
|
||||
xfs_agnumber_t agno; /* allocation group number */
|
||||
xfs_agi_t *agi; /* allocation group header */
|
||||
xfs_btree_cur_t *cur; /* inode btree cursor */
|
||||
int error; /* error return value */
|
||||
int i; /* result code */
|
||||
int ilen; /* inodes in an inode cluster */
|
||||
xfs_mount_t *mp; /* mount structure for filesystem */
|
||||
int off; /* offset of inode in inode chunk */
|
||||
xfs_inobt_rec_incore_t rec; /* btree record */
|
||||
struct xfs_perag *pag;
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
||||
struct xfs_perag *pag;
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_inobt_rec_incore rec;
|
||||
int ilen;
|
||||
int error;
|
||||
int i;
|
||||
int off;
|
||||
|
||||
mp = tp->t_mountp;
|
||||
|
||||
/*
|
||||
* Break up inode number into its components.
|
||||
*/
|
||||
agno = XFS_INO_TO_AGNO(mp, inode);
|
||||
if (agno >= mp->m_sb.sb_agcount) {
|
||||
xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
|
||||
__func__, agno, mp->m_sb.sb_agcount);
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
agino = XFS_INO_TO_AGINO(mp, inode);
|
||||
if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
|
||||
xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
|
||||
__func__, (unsigned long long)inode,
|
||||
(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
|
||||
if (agbno >= mp->m_sb.sb_agblocks) {
|
||||
xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
|
||||
__func__, agbno, mp->m_sb.sb_agblocks);
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
/*
|
||||
* Get the allocation group header.
|
||||
*/
|
||||
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
|
||||
__func__, error);
|
||||
return error;
|
||||
}
|
||||
agi = XFS_BUF_TO_AGI(agbp);
|
||||
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
||||
ASSERT(agbno < be32_to_cpu(agi->agi_length));
|
||||
ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
|
||||
|
||||
/*
|
||||
* Initialize the cursor.
|
||||
*/
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
||||
|
||||
error = xfs_check_agi_freecount(cur, agi);
|
||||
if (error)
|
||||
@ -1261,6 +1549,7 @@ xfs_difree(
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
*orec = rec;
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
return 0;
|
||||
|
||||
@ -1269,6 +1558,182 @@ error0:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an inode in the free inode btree.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_difree_finobt(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agbp,
|
||||
xfs_agino_t agino,
|
||||
struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
|
||||
{
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_inobt_rec_incore rec;
|
||||
int offset = agino - ibtrec->ir_startino;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
|
||||
|
||||
error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
|
||||
if (error)
|
||||
goto error;
|
||||
if (i == 0) {
|
||||
/*
|
||||
* If the record does not exist in the finobt, we must have just
|
||||
* freed an inode in a previously fully allocated chunk. If not,
|
||||
* something is out of sync.
|
||||
*/
|
||||
XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
|
||||
|
||||
error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
|
||||
ibtrec->ir_free, &i);
|
||||
if (error)
|
||||
goto error;
|
||||
ASSERT(i == 1);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read and update the existing record. We could just copy the ibtrec
|
||||
* across here, but that would defeat the purpose of having redundant
|
||||
* metadata. By making the modifications independently, we can catch
|
||||
* corruptions that we wouldn't see if we just copied from one record
|
||||
* to another.
|
||||
*/
|
||||
error = xfs_inobt_get_rec(cur, &rec, &i);
|
||||
if (error)
|
||||
goto error;
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1, error);
|
||||
|
||||
rec.ir_free |= XFS_INOBT_MASK(offset);
|
||||
rec.ir_freecount++;
|
||||
|
||||
XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
|
||||
(rec.ir_freecount == ibtrec->ir_freecount),
|
||||
error);
|
||||
|
||||
/*
|
||||
* The content of inobt records should always match between the inobt
|
||||
* and finobt. The lifecycle of records in the finobt is different from
|
||||
* the inobt in that the finobt only tracks records with at least one
|
||||
* free inode. Hence, if all of the inodes are free and we aren't
|
||||
* keeping inode chunks permanently on disk, remove the record.
|
||||
* Otherwise, update the record with the new information.
|
||||
*/
|
||||
if (rec.ir_freecount == mp->m_ialloc_inos &&
|
||||
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
||||
error = xfs_btree_delete(cur, &i);
|
||||
if (error)
|
||||
goto error;
|
||||
ASSERT(i == 1);
|
||||
} else {
|
||||
error = xfs_inobt_update(cur, &rec);
|
||||
if (error)
|
||||
goto error;
|
||||
}
|
||||
|
||||
out:
|
||||
error = xfs_check_agi_freecount(cur, agi);
|
||||
if (error)
|
||||
goto error;
|
||||
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free disk inode. Carefully avoids touching the incore inode, all
|
||||
* manipulations incore are the caller's responsibility.
|
||||
* The on-disk inode is not changed by this operation, only the
|
||||
* btree (free inode mask) is changed.
|
||||
*/
|
||||
int
|
||||
xfs_difree(
|
||||
struct xfs_trans *tp, /* transaction pointer */
|
||||
xfs_ino_t inode, /* inode to be freed */
|
||||
struct xfs_bmap_free *flist, /* extents to free */
|
||||
int *delete,/* set if inode cluster was deleted */
|
||||
xfs_ino_t *first_ino)/* first inode in deleted cluster */
|
||||
{
|
||||
/* REFERENCED */
|
||||
xfs_agblock_t agbno; /* block number containing inode */
|
||||
struct xfs_buf *agbp; /* buffer for allocation group header */
|
||||
xfs_agino_t agino; /* allocation group inode number */
|
||||
xfs_agnumber_t agno; /* allocation group number */
|
||||
int error; /* error return value */
|
||||
struct xfs_mount *mp; /* mount structure for filesystem */
|
||||
struct xfs_inobt_rec_incore rec;/* btree record */
|
||||
|
||||
mp = tp->t_mountp;
|
||||
|
||||
/*
|
||||
* Break up inode number into its components.
|
||||
*/
|
||||
agno = XFS_INO_TO_AGNO(mp, inode);
|
||||
if (agno >= mp->m_sb.sb_agcount) {
|
||||
xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
|
||||
__func__, agno, mp->m_sb.sb_agcount);
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
agino = XFS_INO_TO_AGINO(mp, inode);
|
||||
if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
|
||||
xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
|
||||
__func__, (unsigned long long)inode,
|
||||
(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
|
||||
if (agbno >= mp->m_sb.sb_agblocks) {
|
||||
xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
|
||||
__func__, agbno, mp->m_sb.sb_agblocks);
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EINVAL);
|
||||
}
|
||||
/*
|
||||
* Get the allocation group header.
|
||||
*/
|
||||
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
|
||||
if (error) {
|
||||
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
|
||||
__func__, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix up the inode allocation btree.
|
||||
*/
|
||||
error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
|
||||
&rec);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
/*
|
||||
* Fix up the free inode btree.
|
||||
*/
|
||||
if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
|
||||
error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error0:
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_imap_lookup(
|
||||
struct xfs_mount *mp,
|
||||
@ -1300,7 +1765,7 @@ xfs_imap_lookup(
|
||||
* we have a record, we need to ensure it contains the inode number
|
||||
* we are looking up.
|
||||
*/
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
||||
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
|
||||
if (!error) {
|
||||
if (i)
|
||||
@ -1488,7 +1953,16 @@ xfs_ialloc_compute_maxlevels(
|
||||
}
|
||||
|
||||
/*
|
||||
* Log specified fields for the ag hdr (inode section)
|
||||
* Log specified fields for the ag hdr (inode section). The growth of the agi
|
||||
* structure over time requires that we interpret the buffer as two logical
|
||||
* regions delineated by the end of the unlinked list. This is due to the size
|
||||
* of the hash table and its location in the middle of the agi.
|
||||
*
|
||||
* For example, a request to log a field before agi_unlinked and a field after
|
||||
* agi_unlinked could cause us to log the entire hash table and use an excessive
|
||||
* amount of log space. To avoid this behavior, log the region up through
|
||||
* agi_unlinked in one call and the region after agi_unlinked through the end of
|
||||
* the structure in another.
|
||||
*/
|
||||
void
|
||||
xfs_ialloc_log_agi(
|
||||
@ -1511,6 +1985,8 @@ xfs_ialloc_log_agi(
|
||||
offsetof(xfs_agi_t, agi_newino),
|
||||
offsetof(xfs_agi_t, agi_dirino),
|
||||
offsetof(xfs_agi_t, agi_unlinked),
|
||||
offsetof(xfs_agi_t, agi_free_root),
|
||||
offsetof(xfs_agi_t, agi_free_level),
|
||||
sizeof(xfs_agi_t)
|
||||
};
|
||||
#ifdef DEBUG
|
||||
@ -1519,15 +1995,30 @@ xfs_ialloc_log_agi(
|
||||
agi = XFS_BUF_TO_AGI(bp);
|
||||
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
||||
#endif
|
||||
/*
|
||||
* Compute byte offsets for the first and last fields.
|
||||
*/
|
||||
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
|
||||
/*
|
||||
* Log the allocation group inode header buffer.
|
||||
*/
|
||||
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
|
||||
xfs_trans_log_buf(tp, bp, first, last);
|
||||
|
||||
/*
|
||||
* Compute byte offsets for the first and last fields in the first
|
||||
* region and log the agi buffer. This only logs up through
|
||||
* agi_unlinked.
|
||||
*/
|
||||
if (fields & XFS_AGI_ALL_BITS_R1) {
|
||||
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
|
||||
&first, &last);
|
||||
xfs_trans_log_buf(tp, bp, first, last);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mask off the bits in the first region and calculate the first and
|
||||
* last field offsets for any bits in the second region.
|
||||
*/
|
||||
fields &= ~XFS_AGI_ALL_BITS_R1;
|
||||
if (fields) {
|
||||
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
|
||||
&first, &last);
|
||||
xfs_trans_log_buf(tp, bp, first, last);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
@ -49,7 +49,8 @@ xfs_inobt_dup_cursor(
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
|
||||
cur->bc_private.a.agbp, cur->bc_private.a.agno);
|
||||
cur->bc_private.a.agbp, cur->bc_private.a.agno,
|
||||
cur->bc_btnum);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@ -66,6 +67,21 @@ xfs_inobt_set_root(
|
||||
xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_finobt_set_root(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *nptr,
|
||||
int inc) /* level change */
|
||||
{
|
||||
struct xfs_buf *agbp = cur->bc_private.a.agbp;
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
|
||||
agi->agi_free_root = nptr->s;
|
||||
be32_add_cpu(&agi->agi_free_level, inc);
|
||||
xfs_ialloc_log_agi(cur->bc_tp, agbp,
|
||||
XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_inobt_alloc_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
@ -172,6 +188,17 @@ xfs_inobt_init_ptr_from_cur(
|
||||
ptr->s = agi->agi_root;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_finobt_init_ptr_from_cur(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr)
|
||||
{
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
|
||||
|
||||
ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
|
||||
ptr->s = agi->agi_free_root;
|
||||
}
|
||||
|
||||
STATIC __int64_t
|
||||
xfs_inobt_key_diff(
|
||||
struct xfs_btree_cur *cur,
|
||||
@ -202,6 +229,7 @@ xfs_inobt_verify(
|
||||
*/
|
||||
switch (block->bb_magic) {
|
||||
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
|
||||
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
|
||||
if (!xfs_sb_version_hascrc(&mp->m_sb))
|
||||
return false;
|
||||
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
|
||||
@ -213,6 +241,7 @@ xfs_inobt_verify(
|
||||
return false;
|
||||
/* fall through */
|
||||
case cpu_to_be32(XFS_IBT_MAGIC):
|
||||
case cpu_to_be32(XFS_FIBT_MAGIC):
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
@ -316,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct xfs_btree_ops xfs_finobt_ops = {
|
||||
.rec_len = sizeof(xfs_inobt_rec_t),
|
||||
.key_len = sizeof(xfs_inobt_key_t),
|
||||
|
||||
.dup_cursor = xfs_inobt_dup_cursor,
|
||||
.set_root = xfs_finobt_set_root,
|
||||
.alloc_block = xfs_inobt_alloc_block,
|
||||
.free_block = xfs_inobt_free_block,
|
||||
.get_minrecs = xfs_inobt_get_minrecs,
|
||||
.get_maxrecs = xfs_inobt_get_maxrecs,
|
||||
.init_key_from_rec = xfs_inobt_init_key_from_rec,
|
||||
.init_rec_from_key = xfs_inobt_init_rec_from_key,
|
||||
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
|
||||
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
|
||||
.key_diff = xfs_inobt_key_diff,
|
||||
.buf_ops = &xfs_inobt_buf_ops,
|
||||
#if defined(DEBUG) || defined(XFS_WARN)
|
||||
.keys_inorder = xfs_inobt_keys_inorder,
|
||||
.recs_inorder = xfs_inobt_recs_inorder,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate a new inode btree cursor.
|
||||
*/
|
||||
@ -324,7 +375,8 @@ xfs_inobt_init_cursor(
|
||||
struct xfs_mount *mp, /* file system mount point */
|
||||
struct xfs_trans *tp, /* transaction pointer */
|
||||
struct xfs_buf *agbp, /* buffer for agi structure */
|
||||
xfs_agnumber_t agno) /* allocation group number */
|
||||
xfs_agnumber_t agno, /* allocation group number */
|
||||
xfs_btnum_t btnum) /* ialloc or free ino btree */
|
||||
{
|
||||
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
||||
struct xfs_btree_cur *cur;
|
||||
@ -333,11 +385,17 @@ xfs_inobt_init_cursor(
|
||||
|
||||
cur->bc_tp = tp;
|
||||
cur->bc_mp = mp;
|
||||
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
|
||||
cur->bc_btnum = XFS_BTNUM_INO;
|
||||
cur->bc_btnum = btnum;
|
||||
if (btnum == XFS_BTNUM_INO) {
|
||||
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
|
||||
cur->bc_ops = &xfs_inobt_ops;
|
||||
} else {
|
||||
cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
|
||||
cur->bc_ops = &xfs_finobt_ops;
|
||||
}
|
||||
|
||||
cur->bc_blocklog = mp->m_sb.sb_blocklog;
|
||||
|
||||
cur->bc_ops = &xfs_inobt_ops;
|
||||
if (xfs_sb_version_hascrc(&mp->m_sb))
|
||||
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
|
||||
|
||||
|
@ -58,7 +58,8 @@ struct xfs_mount;
|
||||
((index) - 1) * sizeof(xfs_inobt_ptr_t)))
|
||||
|
||||
extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
|
||||
struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
|
||||
struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
|
||||
xfs_btnum_t);
|
||||
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
|
||||
|
||||
#endif /* __XFS_IALLOC_BTREE_H__ */
|
||||
|
@ -1811,9 +1811,33 @@ xfs_inactive_ifree(
|
||||
int error;
|
||||
|
||||
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
|
||||
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
|
||||
|
||||
/*
|
||||
* The ifree transaction might need to allocate blocks for record
|
||||
* insertion to the finobt. We don't want to fail here at ENOSPC, so
|
||||
* allow ifree to dip into the reserved block pool if necessary.
|
||||
*
|
||||
* Freeing large sets of inodes generally means freeing inode chunks,
|
||||
* directory and file data blocks, so this should be relatively safe.
|
||||
* Only under severe circumstances should it be possible to free enough
|
||||
* inodes to exhaust the reserve block pool via finobt expansion while
|
||||
* at the same time not creating free space in the filesystem.
|
||||
*
|
||||
* Send a warning if the reservation does happen to fail, as the inode
|
||||
* now remains allocated and sits on the unlinked list until the fs is
|
||||
* repaired.
|
||||
*/
|
||||
tp->t_flags |= XFS_TRANS_RESERVE;
|
||||
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
|
||||
XFS_IFREE_SPACE_RES(mp), 0);
|
||||
if (error) {
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(mp));
|
||||
if (error == ENOSPC) {
|
||||
xfs_warn_ratelimited(mp,
|
||||
"Failed to remove inode(s) from unlinked list. "
|
||||
"Please free space, unmount and run xfs_repair.");
|
||||
} else {
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(mp));
|
||||
}
|
||||
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
|
||||
return error;
|
||||
}
|
||||
|
@ -270,7 +270,8 @@ xfs_bulkstat(
|
||||
/*
|
||||
* Allocate and initialize a btree cursor for ialloc btree.
|
||||
*/
|
||||
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
|
||||
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
|
||||
XFS_BTNUM_INO);
|
||||
irbp = irbuf;
|
||||
irbufend = irbuf + nirbuf;
|
||||
end_of_ag = 0;
|
||||
@ -621,7 +622,8 @@ xfs_inumbers(
|
||||
agino = 0;
|
||||
continue;
|
||||
}
|
||||
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
|
||||
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
|
||||
XFS_BTNUM_INO);
|
||||
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
|
||||
&tmp);
|
||||
if (error) {
|
||||
|
@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type(
|
||||
bp->b_ops = &xfs_allocbt_buf_ops;
|
||||
break;
|
||||
case XFS_IBT_CRC_MAGIC:
|
||||
case XFS_FIBT_CRC_MAGIC:
|
||||
case XFS_IBT_MAGIC:
|
||||
case XFS_FIBT_MAGIC:
|
||||
bp->b_ops = &xfs_inobt_buf_ops;
|
||||
break;
|
||||
case XFS_BMAP_CRC_MAGIC:
|
||||
|
@ -587,7 +587,9 @@ xfs_sb_has_compat_feature(
|
||||
return (sbp->sb_features_compat & feature) != 0;
|
||||
}
|
||||
|
||||
#define XFS_SB_FEAT_RO_COMPAT_ALL 0
|
||||
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_ALL \
|
||||
(XFS_SB_FEAT_RO_COMPAT_FINOBT)
|
||||
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
|
||||
static inline bool
|
||||
xfs_sb_has_ro_compat_feature(
|
||||
@ -641,6 +643,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
|
||||
(sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
|
||||
}
|
||||
|
||||
static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
|
||||
{
|
||||
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
|
||||
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
|
||||
}
|
||||
|
||||
/*
|
||||
* end of superblock version macros
|
||||
*/
|
||||
|
@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
|
||||
{ "abtc2", XFSSTAT_END_ABTC_V2 },
|
||||
{ "bmbt2", XFSSTAT_END_BMBT_V2 },
|
||||
{ "ibt2", XFSSTAT_END_IBT_V2 },
|
||||
{ "fibt2", XFSSTAT_END_FIBT_V2 },
|
||||
/* we print both series of quota information together */
|
||||
{ "qm", XFSSTAT_END_QM },
|
||||
};
|
||||
|
@ -183,7 +183,23 @@ struct xfsstats {
|
||||
__uint32_t xs_ibt_2_alloc;
|
||||
__uint32_t xs_ibt_2_free;
|
||||
__uint32_t xs_ibt_2_moves;
|
||||
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6)
|
||||
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
|
||||
__uint32_t xs_fibt_2_lookup;
|
||||
__uint32_t xs_fibt_2_compare;
|
||||
__uint32_t xs_fibt_2_insrec;
|
||||
__uint32_t xs_fibt_2_delrec;
|
||||
__uint32_t xs_fibt_2_newroot;
|
||||
__uint32_t xs_fibt_2_killroot;
|
||||
__uint32_t xs_fibt_2_increment;
|
||||
__uint32_t xs_fibt_2_decrement;
|
||||
__uint32_t xs_fibt_2_lshift;
|
||||
__uint32_t xs_fibt_2_rshift;
|
||||
__uint32_t xs_fibt_2_split;
|
||||
__uint32_t xs_fibt_2_join;
|
||||
__uint32_t xs_fibt_2_alloc;
|
||||
__uint32_t xs_fibt_2_free;
|
||||
__uint32_t xs_fibt_2_moves;
|
||||
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6)
|
||||
__uint32_t xs_qm_dqreclaims;
|
||||
__uint32_t xs_qm_dqreclaim_misses;
|
||||
__uint32_t xs_qm_dquot_dups;
|
||||
|
@ -105,6 +105,47 @@ xfs_calc_inode_res(
|
||||
2 * XFS_BMBT_BLOCK_LEN(mp));
|
||||
}
|
||||
|
||||
/*
|
||||
* The free inode btree is a conditional feature and the log reservation
|
||||
* requirements differ slightly from that of the traditional inode allocation
|
||||
* btree. The finobt tracks records for inode chunks with at least one free
|
||||
* inode. A record can be removed from the tree for an inode allocation
|
||||
* or free and thus the finobt reservation is unconditional across:
|
||||
*
|
||||
* - inode allocation
|
||||
* - inode free
|
||||
* - inode chunk allocation
|
||||
*
|
||||
* The 'modify' param indicates to include the record modification scenario. The
|
||||
* 'alloc' param indicates to include the reservation for free space btree
|
||||
* modifications on behalf of finobt modifications. This is required only for
|
||||
* transactions that do not already account for free space btree modifications.
|
||||
*
|
||||
* the free inode btree: max depth * block size
|
||||
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
||||
* the free inode btree entry: block size
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_calc_finobt_res(
|
||||
struct xfs_mount *mp,
|
||||
int alloc,
|
||||
int modify)
|
||||
{
|
||||
uint res;
|
||||
|
||||
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
|
||||
return 0;
|
||||
|
||||
res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
|
||||
if (alloc)
|
||||
res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
|
||||
XFS_FSB_TO_B(mp, 1));
|
||||
if (modify)
|
||||
res += (uint)XFS_FSB_TO_B(mp, 1);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Various log reservation values.
|
||||
*
|
||||
@ -302,6 +343,7 @@ xfs_calc_remove_reservation(
|
||||
* the superblock for the nlink flag: sector size
|
||||
* the directory btree: (max depth + v2) * dir block size
|
||||
* the directory inode's bmap btree: (max depth + v2) * block size
|
||||
* the finobt (record modification and allocation btrees)
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_calc_create_resv_modify(
|
||||
@ -310,7 +352,8 @@ xfs_calc_create_resv_modify(
|
||||
return xfs_calc_inode_res(mp, 2) +
|
||||
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
|
||||
(uint)XFS_FSB_TO_B(mp, 1) +
|
||||
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
|
||||
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
|
||||
xfs_calc_finobt_res(mp, 1, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -348,6 +391,7 @@ __xfs_calc_create_reservation(
|
||||
* the superblock for the nlink flag: sector size
|
||||
* the inode btree: max depth * blocksize
|
||||
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
||||
* the finobt (record insertion)
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_calc_icreate_resv_alloc(
|
||||
@ -357,7 +401,8 @@ xfs_calc_icreate_resv_alloc(
|
||||
mp->m_sb.sb_sectsize +
|
||||
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
|
||||
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
|
||||
XFS_FSB_TO_B(mp, 1));
|
||||
XFS_FSB_TO_B(mp, 1)) +
|
||||
xfs_calc_finobt_res(mp, 0, 0);
|
||||
}
|
||||
|
||||
STATIC uint
|
||||
@ -425,6 +470,7 @@ xfs_calc_symlink_reservation(
|
||||
* the on disk inode before ours in the agi hash list: inode cluster size
|
||||
* the inode btree: max depth * blocksize
|
||||
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
||||
* the finobt (record insertion, removal or modification)
|
||||
*/
|
||||
STATIC uint
|
||||
xfs_calc_ifree_reservation(
|
||||
@ -439,7 +485,8 @@ xfs_calc_ifree_reservation(
|
||||
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
|
||||
mp->m_in_maxlevels, 0) +
|
||||
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
|
||||
XFS_FSB_TO_B(mp, 1));
|
||||
XFS_FSB_TO_B(mp, 1)) +
|
||||
xfs_calc_finobt_res(mp, 0, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -47,7 +47,9 @@
|
||||
#define XFS_DIRREMOVE_SPACE_RES(mp) \
|
||||
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
|
||||
#define XFS_IALLOC_SPACE_RES(mp) \
|
||||
((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1)
|
||||
((mp)->m_ialloc_blks + \
|
||||
(xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
|
||||
((mp)->m_in_maxlevels - 1)))
|
||||
|
||||
/*
|
||||
* Space reservation values for various transactions.
|
||||
@ -82,5 +84,8 @@
|
||||
(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
|
||||
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
|
||||
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
|
||||
#define XFS_IFREE_SPACE_RES(mp) \
|
||||
(xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
|
||||
|
||||
|
||||
#endif /* __XFS_TRANS_SPACE_H__ */
|
||||
|
@ -134,7 +134,7 @@ typedef enum {
|
||||
|
||||
typedef enum {
|
||||
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
|
||||
XFS_BTNUM_MAX
|
||||
XFS_BTNUM_FINOi, XFS_BTNUM_MAX
|
||||
} xfs_btnum_t;
|
||||
|
||||
struct xfs_name {
|
||||
|
Loading…
Reference in New Issue
Block a user