2018-06-06 02:42:14 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
2017-03-28 21:56:37 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2017 Oracle. All Rights Reserved.
|
|
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
|
|
*/
|
|
|
|
#include "xfs.h"
|
|
|
|
#include "xfs_fs.h"
|
|
|
|
#include "xfs_shared.h"
|
|
|
|
#include "xfs_format.h"
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
#include "xfs_trans_resv.h"
|
|
|
|
#include "xfs_mount.h"
|
|
|
|
#include "xfs_inode.h"
|
|
|
|
#include "xfs_trans.h"
|
|
|
|
#include "xfs_btree.h"
|
|
|
|
#include "xfs_rmap_btree.h"
|
|
|
|
#include "xfs_trace.h"
|
|
|
|
#include "xfs_rmap.h"
|
|
|
|
#include "xfs_alloc.h"
|
|
|
|
#include "xfs_bit.h"
|
|
|
|
#include <linux/fsmap.h>
|
|
|
|
#include "xfs_fsmap.h"
|
|
|
|
#include "xfs_refcount.h"
|
|
|
|
#include "xfs_refcount_btree.h"
|
2017-03-28 21:56:37 +00:00
|
|
|
#include "xfs_alloc_btree.h"
|
2023-10-16 16:21:47 +00:00
|
|
|
#include "xfs_rtbitmap.h"
|
2021-06-02 00:48:24 +00:00
|
|
|
#include "xfs_ag.h"
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
/* Convert an xfs_fsmap to an fsmap. */
|
2020-10-01 17:56:07 +00:00
|
|
|
static void
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_fsmap_from_internal(
|
|
|
|
struct fsmap *dest,
|
|
|
|
struct xfs_fsmap *src)
|
|
|
|
{
|
|
|
|
dest->fmr_device = src->fmr_device;
|
|
|
|
dest->fmr_flags = src->fmr_flags;
|
|
|
|
dest->fmr_physical = BBTOB(src->fmr_physical);
|
|
|
|
dest->fmr_owner = src->fmr_owner;
|
|
|
|
dest->fmr_offset = BBTOB(src->fmr_offset);
|
|
|
|
dest->fmr_length = BBTOB(src->fmr_length);
|
|
|
|
dest->fmr_reserved[0] = 0;
|
|
|
|
dest->fmr_reserved[1] = 0;
|
|
|
|
dest->fmr_reserved[2] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert an fsmap to an xfs_fsmap. */
|
2024-08-30 22:37:16 +00:00
|
|
|
static void
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_fsmap_to_internal(
|
|
|
|
struct xfs_fsmap *dest,
|
|
|
|
struct fsmap *src)
|
|
|
|
{
|
|
|
|
dest->fmr_device = src->fmr_device;
|
|
|
|
dest->fmr_flags = src->fmr_flags;
|
|
|
|
dest->fmr_physical = BTOBBT(src->fmr_physical);
|
|
|
|
dest->fmr_owner = src->fmr_owner;
|
|
|
|
dest->fmr_offset = BTOBBT(src->fmr_offset);
|
|
|
|
dest->fmr_length = BTOBBT(src->fmr_length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert an fsmap owner into an rmapbt owner. */
|
|
|
|
static int
|
|
|
|
xfs_fsmap_owner_to_rmap(
|
|
|
|
struct xfs_rmap_irec *dest,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *src)
|
2017-03-28 21:56:37 +00:00
|
|
|
{
|
|
|
|
if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
|
|
|
|
dest->rm_owner = src->fmr_owner;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (src->fmr_owner) {
|
|
|
|
case 0: /* "lowest owner id possible" */
|
|
|
|
case -1ULL: /* "highest owner id possible" */
|
xfs: Fix the owner setting issue for rmap query in xfs fsmap
I notice a rmap query bug in xfs_io fsmap:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
Bug:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt
[root@fedora ~]#
Normally, we should be able to get one record, but we got nothing.
The root cause of this problem lies in the incorrect setting of rm_owner in
the rmap query. In the case of the initial query where the owner is not
set, __xfs_getfsmap_datadev() first sets info->high.rm_owner to ULLONG_MAX.
This is done to prevent any omissions when comparing rmap items. However,
if the current ag is detected to be the last one, the function sets info's
high_irec based on the provided key. If high->rm_owner is not specified, it
should continue to be set to ULLONG_MAX; otherwise, there will be issues
with interval omissions. For example, consider "start" and "end" within the
same block. If high->rm_owner == 0, it will be smaller than the founded
record in rmapbt, resulting in a query with no records. The main call stack
is as follows:
xfs_ioc_getfsmap
xfs_getfsmap
xfs_getfsmap_datadev_rmapbt
__xfs_getfsmap_datadev
info->high.rm_owner = ULLONG_MAX
if (pag->pag_agno == end_ag)
xfs_fsmap_owner_to_rmap
// set info->high.rm_owner = 0 because fmr_owner == -1ULL
dest->rm_owner = 0
// get nothing
xfs_getfsmap_datadev_rmapbt_query
The problem can be resolved by simply modify the xfs_fsmap_owner_to_rmap
function internal logic to achieve.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:04 +00:00
|
|
|
dest->rm_owner = src->fmr_owner;
|
2017-03-28 21:56:37 +00:00
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_FREE:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_NULL;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_UNKNOWN:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_FS:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_FS;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_LOG:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_LOG;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_AG:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_AG;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_INOBT:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_INOBT;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_INODES:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_INODES;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_REFC:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_REFC;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_COW:
|
|
|
|
dest->rm_owner = XFS_RMAP_OWN_COW;
|
|
|
|
break;
|
|
|
|
case XFS_FMR_OWN_DEFECTIVE: /* not implemented */
|
|
|
|
/* fall through */
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert an rmapbt owner into an fsmap owner. */
|
|
|
|
static int
|
2024-11-04 04:19:03 +00:00
|
|
|
xfs_fsmap_owner_from_frec(
|
2021-08-11 00:02:16 +00:00
|
|
|
struct xfs_fsmap *dest,
|
2024-11-04 04:19:03 +00:00
|
|
|
const struct xfs_fsmap_irec *frec)
|
2017-03-28 21:56:37 +00:00
|
|
|
{
|
|
|
|
dest->fmr_flags = 0;
|
2024-11-04 04:19:03 +00:00
|
|
|
if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) {
|
|
|
|
dest->fmr_owner = frec->owner;
|
2017-03-28 21:56:37 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
switch (frec->owner) {
|
2017-03-28 21:56:37 +00:00
|
|
|
case XFS_RMAP_OWN_FS:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_FS;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_LOG:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_LOG;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_AG:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_AG;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_INOBT:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_INOBT;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_INODES:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_INODES;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_REFC:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_REFC;
|
|
|
|
break;
|
|
|
|
case XFS_RMAP_OWN_COW:
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_COW;
|
|
|
|
break;
|
2017-03-28 21:56:37 +00:00
|
|
|
case XFS_RMAP_OWN_NULL: /* "free" */
|
|
|
|
dest->fmr_owner = XFS_FMR_OWN_FREE;
|
|
|
|
break;
|
2017-03-28 21:56:37 +00:00
|
|
|
default:
|
2019-11-02 16:41:18 +00:00
|
|
|
ASSERT(0);
|
2017-03-28 21:56:37 +00:00
|
|
|
return -EFSCORRUPTED;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* getfsmap query state */
|
|
|
|
struct xfs_getfsmap_info {
|
|
|
|
struct xfs_fsmap_head *head;
|
2020-10-01 17:56:07 +00:00
|
|
|
struct fsmap *fsmap_recs; /* mapping records */
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
|
2024-11-04 04:18:45 +00:00
|
|
|
struct xfs_group *group; /* group info, if applicable */
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_daddr_t next_daddr; /* next daddr we expect */
|
2023-06-30 00:39:43 +00:00
|
|
|
/* daddr of low fsmap key when we're using the rtbitmap */
|
|
|
|
xfs_daddr_t low_daddr;
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
xfs_daddr_t end_daddr; /* daddr of high fsmap key */
|
2017-03-28 21:56:37 +00:00
|
|
|
u64 missing_owner; /* owner of holes */
|
|
|
|
u32 dev; /* device id */
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
/*
|
|
|
|
* Low rmap key for the query. If low.rm_blockcount is nonzero, this
|
|
|
|
* is the second (or later) call to retrieve the recordset in pieces.
|
|
|
|
* xfs_getfsmap_rec_before_start will compare all records retrieved
|
|
|
|
* by the rmapbt query to filter out any records that start before
|
|
|
|
* the last record.
|
|
|
|
*/
|
|
|
|
struct xfs_rmap_irec low;
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_rmap_irec high; /* high rmap key */
|
|
|
|
bool last; /* last extent? */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Associate a device with a getfsmap handler. */
|
|
|
|
struct xfs_getfsmap_dev {
|
|
|
|
u32 dev;
|
|
|
|
int (*fn)(struct xfs_trans *tp,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *keys,
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info);
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
sector_t nr_sectors;
|
2017-03-28 21:56:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Compare two getfsmap device handlers. */
|
|
|
|
static int
|
|
|
|
xfs_getfsmap_dev_compare(
|
|
|
|
const void *p1,
|
|
|
|
const void *p2)
|
|
|
|
{
|
|
|
|
const struct xfs_getfsmap_dev *d1 = p1;
|
|
|
|
const struct xfs_getfsmap_dev *d2 = p2;
|
|
|
|
|
|
|
|
return d1->dev - d2->dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Decide if this mapping is shared. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_is_shared(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_getfsmap_info *info,
|
2024-11-04 04:19:03 +00:00
|
|
|
const struct xfs_fsmap_irec *frec,
|
2017-03-28 21:56:37 +00:00
|
|
|
bool *stat)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
|
|
struct xfs_btree_cur *cur;
|
|
|
|
xfs_agblock_t fbno;
|
|
|
|
xfs_extlen_t flen;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
*stat = false;
|
2021-08-19 01:46:37 +00:00
|
|
|
if (!xfs_has_reflink(mp))
|
2017-03-28 21:56:37 +00:00
|
|
|
return 0;
|
2021-06-02 00:48:24 +00:00
|
|
|
/* rt files will have no perag structure */
|
2024-11-04 04:18:45 +00:00
|
|
|
if (!info->group)
|
2017-03-28 21:56:37 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Are there any shared blocks here? */
|
|
|
|
flen = 0;
|
2024-11-04 04:18:45 +00:00
|
|
|
cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
|
|
|
|
to_perag(info->group));
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
error = xfs_refcount_find_shared(cur, frec->rec_key,
|
|
|
|
XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen,
|
|
|
|
false);
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2018-07-19 19:26:31 +00:00
|
|
|
xfs_btree_del_cursor(cur, error);
|
2017-03-28 21:56:37 +00:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
*stat = flen > 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-10-01 17:56:07 +00:00
|
|
|
static inline void
|
|
|
|
xfs_getfsmap_format(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_fsmap *xfm,
|
|
|
|
struct xfs_getfsmap_info *info)
|
|
|
|
{
|
|
|
|
struct fsmap *rec;
|
|
|
|
|
|
|
|
trace_xfs_getfsmap_mapping(mp, xfm);
|
|
|
|
|
|
|
|
rec = &info->fsmap_recs[info->head->fmh_entries++];
|
|
|
|
xfs_fsmap_from_internal(rec, xfm);
|
|
|
|
}
|
|
|
|
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
static inline bool
|
2024-11-04 04:19:03 +00:00
|
|
|
xfs_getfsmap_frec_before_start(
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
struct xfs_getfsmap_info *info,
|
2024-11-04 04:19:03 +00:00
|
|
|
const struct xfs_fsmap_irec *frec)
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
{
|
2024-08-23 00:00:20 +00:00
|
|
|
if (info->low_daddr != XFS_BUF_DADDR_NULL)
|
2024-11-04 04:19:03 +00:00
|
|
|
return frec->start_daddr < info->low_daddr;
|
|
|
|
if (info->low.rm_blockcount) {
|
|
|
|
struct xfs_rmap_irec rec = {
|
|
|
|
.rm_startblock = frec->rec_key,
|
|
|
|
.rm_owner = frec->owner,
|
|
|
|
.rm_flags = frec->rm_flags,
|
|
|
|
};
|
|
|
|
|
|
|
|
return xfs_rmap_compare(&rec, &info->low) < 0;
|
|
|
|
}
|
|
|
|
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/*
|
|
|
|
* Format a reverse mapping for getfsmap, having translated rm_startblock
|
2023-06-30 00:39:43 +00:00
|
|
|
* into the appropriate daddr units. Pass in a nonzero @len_daddr if the
|
|
|
|
* length could be larger than rm_blockcount in struct xfs_rmap_irec.
|
2017-03-28 21:56:37 +00:00
|
|
|
*/
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_helper(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_getfsmap_info *info,
|
2024-11-04 04:19:03 +00:00
|
|
|
const struct xfs_fsmap_irec *frec)
|
2017-03-28 21:56:37 +00:00
|
|
|
{
|
|
|
|
struct xfs_fsmap fmr;
|
|
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
|
|
bool shared;
|
2024-11-04 04:19:03 +00:00
|
|
|
int error = 0;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
if (fatal_signal_pending(current))
|
|
|
|
return -EINTR;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Filter out records that start before our startpoint, if the
|
|
|
|
* caller requested that.
|
|
|
|
*/
|
2024-11-04 04:19:03 +00:00
|
|
|
if (xfs_getfsmap_frec_before_start(info, frec))
|
|
|
|
goto out;
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Are we just counting mappings? */
|
|
|
|
if (info->head->fmh_count == 0) {
|
2020-10-01 17:56:07 +00:00
|
|
|
if (info->head->fmh_entries == UINT_MAX)
|
|
|
|
return -ECANCELED;
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
if (frec->start_daddr > info->next_daddr)
|
2017-03-28 21:56:37 +00:00
|
|
|
info->head->fmh_entries++;
|
|
|
|
|
|
|
|
if (info->last)
|
2019-08-28 21:39:46 +00:00
|
|
|
return 0;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
info->head->fmh_entries++;
|
2024-11-04 04:19:03 +00:00
|
|
|
goto out;
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the record starts past the last physical block we saw,
|
|
|
|
* then we've found a gap. Report the gap as being owned by
|
|
|
|
* whatever the caller specified is the missing owner.
|
|
|
|
*/
|
2024-11-04 04:19:03 +00:00
|
|
|
if (frec->start_daddr > info->next_daddr) {
|
2017-03-28 21:56:37 +00:00
|
|
|
if (info->head->fmh_entries >= info->head->fmh_count)
|
2019-08-28 21:37:57 +00:00
|
|
|
return -ECANCELED;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
fmr.fmr_device = info->dev;
|
|
|
|
fmr.fmr_physical = info->next_daddr;
|
|
|
|
fmr.fmr_owner = info->missing_owner;
|
|
|
|
fmr.fmr_offset = 0;
|
2024-11-04 04:19:03 +00:00
|
|
|
fmr.fmr_length = frec->start_daddr - info->next_daddr;
|
2017-03-28 21:56:37 +00:00
|
|
|
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
|
2020-10-01 17:56:07 +00:00
|
|
|
xfs_getfsmap_format(mp, &fmr, info);
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (info->last)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Fill out the extent we found */
|
|
|
|
if (info->head->fmh_entries >= info->head->fmh_count)
|
2019-08-28 21:37:57 +00:00
|
|
|
return -ECANCELED;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2021-06-02 00:48:24 +00:00
|
|
|
trace_xfs_fsmap_mapping(mp, info->dev,
|
2024-11-04 04:18:45 +00:00
|
|
|
info->group ? info->group->xg_gno : NULLAGNUMBER,
|
2024-11-04 04:19:03 +00:00
|
|
|
frec);
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
fmr.fmr_device = info->dev;
|
2024-11-04 04:19:03 +00:00
|
|
|
fmr.fmr_physical = frec->start_daddr;
|
|
|
|
error = xfs_fsmap_owner_from_frec(&fmr, frec);
|
2017-03-28 21:56:37 +00:00
|
|
|
if (error)
|
|
|
|
return error;
|
2024-11-04 04:19:03 +00:00
|
|
|
fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset);
|
|
|
|
fmr.fmr_length = frec->len_daddr;
|
|
|
|
if (frec->rm_flags & XFS_RMAP_UNWRITTEN)
|
2017-03-28 21:56:37 +00:00
|
|
|
fmr.fmr_flags |= FMR_OF_PREALLOC;
|
2024-11-04 04:19:03 +00:00
|
|
|
if (frec->rm_flags & XFS_RMAP_ATTR_FORK)
|
2017-03-28 21:56:37 +00:00
|
|
|
fmr.fmr_flags |= FMR_OF_ATTR_FORK;
|
2024-11-04 04:19:03 +00:00
|
|
|
if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK)
|
2017-03-28 21:56:37 +00:00
|
|
|
fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
|
|
|
|
if (fmr.fmr_flags == 0) {
|
2024-11-04 04:19:03 +00:00
|
|
|
error = xfs_getfsmap_is_shared(tp, info, frec, &shared);
|
2017-03-28 21:56:37 +00:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
if (shared)
|
|
|
|
fmr.fmr_flags |= FMR_OF_SHARED;
|
|
|
|
}
|
|
|
|
|
2020-10-01 17:56:07 +00:00
|
|
|
xfs_getfsmap_format(mp, &fmr, info);
|
2017-03-28 21:56:37 +00:00
|
|
|
out:
|
2024-11-04 04:19:03 +00:00
|
|
|
info->next_daddr = max(info->next_daddr,
|
|
|
|
frec->start_daddr + frec->len_daddr);
|
2019-08-28 21:39:46 +00:00
|
|
|
return 0;
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
static inline int
|
|
|
|
xfs_getfsmap_group_helper(
|
|
|
|
struct xfs_getfsmap_info *info,
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_group *xg,
|
|
|
|
xfs_agblock_t startblock,
|
|
|
|
xfs_extlen_t blockcount,
|
|
|
|
struct xfs_fsmap_irec *frec)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* For an info->last query, we're looking for a gap between the last
|
|
|
|
* mapping emitted and the high key specified by userspace. If the
|
|
|
|
* user's query spans less than 1 fsblock, then info->high and
|
|
|
|
* info->low will have the same rm_startblock, which causes rec_daddr
|
|
|
|
* and next_daddr to be the same. Therefore, use the end_daddr that
|
|
|
|
* we calculated from userspace's high key to synthesize the record.
|
|
|
|
* Note that if the btree query found a mapping, there won't be a gap.
|
|
|
|
*/
|
|
|
|
if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
|
|
|
|
frec->start_daddr = info->end_daddr;
|
|
|
|
else
|
|
|
|
frec->start_daddr = xfs_gbno_to_daddr(xg, startblock);
|
|
|
|
|
|
|
|
frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount);
|
|
|
|
return xfs_getfsmap_helper(tp, info, frec);
|
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Transform a rmapbt irec into a fsmap */
|
|
|
|
STATIC int
|
2024-11-04 04:19:03 +00:00
|
|
|
xfs_getfsmap_rmapbt_helper(
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_btree_cur *cur,
|
2021-08-11 00:02:16 +00:00
|
|
|
const struct xfs_rmap_irec *rec,
|
2017-03-28 21:56:37 +00:00
|
|
|
void *priv)
|
|
|
|
{
|
2024-11-04 04:19:03 +00:00
|
|
|
struct xfs_fsmap_irec frec = {
|
|
|
|
.owner = rec->rm_owner,
|
|
|
|
.offset = rec->rm_offset,
|
|
|
|
.rm_flags = rec->rm_flags,
|
|
|
|
.rec_key = rec->rm_startblock,
|
|
|
|
};
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info = priv;
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
|
|
|
|
rec->rm_startblock, rec->rm_blockcount, &frec);
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Transform a bnobt irec into a fsmap */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_datadev_bnobt_helper(
|
|
|
|
struct xfs_btree_cur *cur,
|
2021-08-11 00:02:16 +00:00
|
|
|
const struct xfs_alloc_rec_incore *rec,
|
2017-03-28 21:56:37 +00:00
|
|
|
void *priv)
|
|
|
|
{
|
2024-11-04 04:19:03 +00:00
|
|
|
struct xfs_fsmap_irec frec = {
|
|
|
|
.owner = XFS_RMAP_OWN_NULL, /* "free" */
|
|
|
|
.rec_key = rec->ar_startblock,
|
|
|
|
};
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info = priv;
|
2024-11-04 04:19:03 +00:00
|
|
|
|
|
|
|
return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
|
|
|
|
rec->ar_startblock, rec->ar_blockcount, &frec);
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Set rmap flags based on the getfsmap flags */
|
|
|
|
static void
|
|
|
|
xfs_getfsmap_set_irec_flags(
|
|
|
|
struct xfs_rmap_irec *irec,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *fmr)
|
2017-03-28 21:56:37 +00:00
|
|
|
{
|
|
|
|
irec->rm_flags = 0;
|
|
|
|
if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
|
|
|
|
irec->rm_flags |= XFS_RMAP_ATTR_FORK;
|
|
|
|
if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
|
|
|
|
irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
|
|
|
|
if (fmr->fmr_flags & FMR_OF_PREALLOC)
|
|
|
|
irec->rm_flags |= XFS_RMAP_UNWRITTEN;
|
|
|
|
}
|
|
|
|
|
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 15:39:02 +00:00
|
|
|
static inline bool
|
|
|
|
rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
|
|
|
|
{
|
|
|
|
if (!xfs_has_reflink(mp))
|
|
|
|
return true;
|
|
|
|
if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
|
|
|
|
return true;
|
|
|
|
if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
|
|
|
|
XFS_RMAP_UNWRITTEN))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Execute a getfsmap query against the regular data device. */
|
|
|
|
STATIC int
|
|
|
|
__xfs_getfsmap_datadev(
|
|
|
|
struct xfs_trans *tp,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *keys,
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info,
|
|
|
|
int (*query_fn)(struct xfs_trans *,
|
|
|
|
struct xfs_getfsmap_info *,
|
|
|
|
struct xfs_btree_cur **,
|
|
|
|
void *),
|
|
|
|
void *priv)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = tp->t_mountp;
|
2024-11-04 04:18:39 +00:00
|
|
|
struct xfs_perag *pag = NULL;
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_btree_cur *bt_cur = NULL;
|
|
|
|
xfs_fsblock_t start_fsb;
|
|
|
|
xfs_fsblock_t end_fsb;
|
2024-11-04 04:18:39 +00:00
|
|
|
xfs_agnumber_t start_ag, end_ag;
|
2021-08-11 00:00:31 +00:00
|
|
|
uint64_t eofs;
|
2017-03-28 21:56:37 +00:00
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
|
|
|
|
if (keys[0].fmr_physical >= eofs)
|
|
|
|
return 0;
|
|
|
|
start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
|
2021-08-11 00:00:31 +00:00
|
|
|
end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert the fsmap low/high keys to AG based keys. Initialize
|
|
|
|
* low to the fsmap low key and max out the high key to the end
|
|
|
|
* of the AG.
|
|
|
|
*/
|
|
|
|
info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
|
|
|
|
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
|
|
|
|
if (error)
|
|
|
|
return error;
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
|
|
|
|
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
/* Adjust the low key if we are continuing from where we left off. */
|
|
|
|
if (info->low.rm_blockcount == 0) {
|
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 15:39:02 +00:00
|
|
|
/* No previous record from which to continue */
|
|
|
|
} else if (rmap_not_shareable(mp, &info->low)) {
|
|
|
|
/* Last record seen was an unshareable extent */
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
info->low.rm_owner = 0;
|
|
|
|
info->low.rm_offset = 0;
|
|
|
|
|
|
|
|
start_fsb += info->low.rm_blockcount;
|
|
|
|
if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
|
|
|
|
return 0;
|
|
|
|
} else {
|
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 15:39:02 +00:00
|
|
|
/* Last record seen was a shareable file data extent */
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
info->low.rm_offset += info->low.rm_blockcount;
|
|
|
|
}
|
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
Dave Chinner reported that xfs/273 fails if the AG size happens to be an
exact power of two. I traced this to an agbno integer overflow when the
current GETFSMAP call is a continuation of a previous GETFSMAP call, and
the last record returned was non-shareable space at the end of an AG.
__xfs_getfsmap_datadev sets up a data device query by converting the
incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno
and agbno pair. In the (failing) case of where fmr_blockcount of the
low key is nonzero and the record was for a non-shareable extent, it
will add fmr_blockcount to start_fsb and info->low.rm_startblock.
If the low key was actually the last record for that AG, then this
addition causes info->low.rm_startblock to point beyond EOAG. When the
rmapbt range query starts, it'll return an empty set, and fsmap moves on
to the next AG.
Or so I thought. Remember how we added to start_fsb?
If agsize < 1<<agblklog, start_fsb points to the same AG as the original
fmr_physical from the low key. We run the rmapbt query, which returns
nothing, so getfsmap zeroes info->low and moves on to the next AG.
If agsize == 1<<agblklog, start_fsb now points to the next AG. We run
the rmapbt query on the next AG with the excessively large
rm_startblock. If this next AG is actually the last AG, we'll set
info->high to EOFS (which is now has a lower rm_startblock than
info->low), and the ranged btree query code will return -EINVAL. If
it's not the last AG, we ignore all records for the intermediate AGs.
Oops.
Fix this by decoding start_fsb into agno and agbno only after making
adjustments to start_fsb. This means that info->low.rm_startblock will
always be set to a valid agbno, and we always start the rmapbt iteration
in the correct AG.
While we're at it, fix the predicate for determining if an fsmap record
represents non-shareable space to include file data on pre-reflink
filesystems.
Reported-by: Dave Chinner <david@fromorbit.com>
Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-09-11 15:39:02 +00:00
|
|
|
info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
info->high.rm_startblock = -1U;
|
|
|
|
info->high.rm_owner = ULLONG_MAX;
|
|
|
|
info->high.rm_offset = ULLONG_MAX;
|
|
|
|
info->high.rm_blockcount = 0;
|
|
|
|
info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
|
|
|
|
|
|
|
|
start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
|
|
|
|
end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
|
|
|
|
|
2024-11-04 04:18:39 +00:00
|
|
|
while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) {
|
2017-03-28 21:56:37 +00:00
|
|
|
/*
|
|
|
|
* Set the AG high key from the fsmap high key if this
|
|
|
|
* is the last AG that we're querying.
|
|
|
|
*/
|
2024-11-04 04:18:45 +00:00
|
|
|
info->group = pag_group(pag);
|
2024-11-04 04:18:38 +00:00
|
|
|
if (pag_agno(pag) == end_ag) {
|
2017-03-28 21:56:37 +00:00
|
|
|
info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
|
|
|
|
end_fsb);
|
|
|
|
info->high.rm_offset = XFS_BB_TO_FSBT(mp,
|
|
|
|
keys[1].fmr_offset);
|
|
|
|
error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
|
|
|
|
if (error)
|
2021-06-02 00:48:24 +00:00
|
|
|
break;
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bt_cur) {
|
|
|
|
xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
|
|
|
|
bt_cur = NULL;
|
|
|
|
xfs_trans_brelse(tp, info->agf_bp);
|
|
|
|
info->agf_bp = NULL;
|
|
|
|
}
|
|
|
|
|
2022-07-07 09:07:40 +00:00
|
|
|
error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp);
|
2017-03-28 21:56:37 +00:00
|
|
|
if (error)
|
2021-06-02 00:48:24 +00:00
|
|
|
break;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag),
|
2021-06-02 00:48:24 +00:00
|
|
|
&info->low);
|
2024-11-04 04:19:03 +00:00
|
|
|
trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag),
|
2017-03-28 21:56:37 +00:00
|
|
|
&info->high);
|
|
|
|
|
|
|
|
error = query_fn(tp, info, &bt_cur, priv);
|
|
|
|
if (error)
|
2021-06-02 00:48:24 +00:00
|
|
|
break;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the AG low key to the start of the AG prior to
|
|
|
|
* moving on to the next AG.
|
|
|
|
*/
|
2024-11-04 04:18:38 +00:00
|
|
|
if (pag_agno(pag) == start_ag)
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
memset(&info->low, 0, sizeof(info->low));
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2021-06-02 00:48:24 +00:00
|
|
|
/*
|
|
|
|
* If this is the last AG, report any gap at the end of it
|
|
|
|
* before we drop the reference to the perag when the loop
|
|
|
|
* terminates.
|
|
|
|
*/
|
2024-11-04 04:18:38 +00:00
|
|
|
if (pag_agno(pag) == end_ag) {
|
2021-06-02 00:48:24 +00:00
|
|
|
info->last = true;
|
|
|
|
error = query_fn(tp, info, &bt_cur, priv);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
}
|
2024-11-04 04:18:45 +00:00
|
|
|
info->group = NULL;
|
2021-06-02 00:48:24 +00:00
|
|
|
}
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
if (bt_cur)
|
|
|
|
xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
|
|
|
|
XFS_BTREE_NOERROR);
|
|
|
|
if (info->agf_bp) {
|
|
|
|
xfs_trans_brelse(tp, info->agf_bp);
|
|
|
|
info->agf_bp = NULL;
|
|
|
|
}
|
2024-11-04 04:18:45 +00:00
|
|
|
if (info->group) {
|
|
|
|
xfs_perag_rele(pag);
|
|
|
|
info->group = NULL;
|
2021-06-02 00:48:24 +00:00
|
|
|
} else if (pag) {
|
|
|
|
/* loop termination case */
|
2023-02-12 22:14:42 +00:00
|
|
|
xfs_perag_rele(pag);
|
2021-06-02 00:48:24 +00:00
|
|
|
}
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Actually query the rmap btree. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_datadev_rmapbt_query(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_getfsmap_info *info,
|
|
|
|
struct xfs_btree_cur **curpp,
|
|
|
|
void *priv)
|
|
|
|
{
|
|
|
|
/* Report any gap at the end of the last AG. */
|
|
|
|
if (info->last)
|
2024-11-04 04:19:03 +00:00
|
|
|
return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info);
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
/* Allocate cursor for this AG and query_range it. */
|
|
|
|
*curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
|
2024-11-04 04:18:45 +00:00
|
|
|
to_perag(info->group));
|
2017-03-28 21:56:37 +00:00
|
|
|
return xfs_rmap_query_range(*curpp, &info->low, &info->high,
|
2024-11-04 04:19:03 +00:00
|
|
|
xfs_getfsmap_rmapbt_helper, info);
|
2017-03-28 21:56:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Execute a getfsmap query against the regular data device rmapbt. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_datadev_rmapbt(
|
|
|
|
struct xfs_trans *tp,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *keys,
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info)
|
|
|
|
{
|
|
|
|
info->missing_owner = XFS_FMR_OWN_FREE;
|
|
|
|
return __xfs_getfsmap_datadev(tp, keys, info,
|
|
|
|
xfs_getfsmap_datadev_rmapbt_query, NULL);
|
|
|
|
}
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Actually query the bno btree. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_datadev_bnobt_query(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_getfsmap_info *info,
|
|
|
|
struct xfs_btree_cur **curpp,
|
|
|
|
void *priv)
|
|
|
|
{
|
|
|
|
struct xfs_alloc_rec_incore *key = priv;
|
|
|
|
|
|
|
|
/* Report any gap at the end of the last AG. */
|
|
|
|
if (info->last)
|
|
|
|
return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
|
|
|
|
|
|
|
|
/* Allocate cursor for this AG and query_range it. */
|
2024-02-22 20:40:12 +00:00
|
|
|
*curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp,
|
2024-11-04 04:18:45 +00:00
|
|
|
to_perag(info->group));
|
2017-03-28 21:56:37 +00:00
|
|
|
key->ar_startblock = info->low.rm_startblock;
|
|
|
|
key[1].ar_startblock = info->high.rm_startblock;
|
|
|
|
return xfs_alloc_query_range(*curpp, key, &key[1],
|
|
|
|
xfs_getfsmap_datadev_bnobt_helper, info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Execute a getfsmap query against the regular data device's bnobt. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_datadev_bnobt(
|
|
|
|
struct xfs_trans *tp,
|
2021-08-11 00:00:31 +00:00
|
|
|
const struct xfs_fsmap *keys,
|
2017-03-28 21:56:37 +00:00
|
|
|
struct xfs_getfsmap_info *info)
|
|
|
|
{
|
|
|
|
struct xfs_alloc_rec_incore akeys[2];
|
|
|
|
|
2023-02-15 01:51:35 +00:00
|
|
|
memset(akeys, 0, sizeof(akeys));
|
2017-03-28 21:56:37 +00:00
|
|
|
info->missing_owner = XFS_FMR_OWN_UNKNOWN;
|
|
|
|
return __xfs_getfsmap_datadev(tp, keys, info,
|
|
|
|
xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
|
|
|
|
}
|
|
|
|
|
2024-08-30 22:37:15 +00:00
|
|
|
/* Execute a getfsmap query against the log device. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_logdev(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
const struct xfs_fsmap *keys,
|
|
|
|
struct xfs_getfsmap_info *info)
|
|
|
|
{
|
2024-11-04 04:19:03 +00:00
|
|
|
struct xfs_fsmap_irec frec = {
|
|
|
|
.start_daddr = 0,
|
|
|
|
.rec_key = 0,
|
|
|
|
.owner = XFS_RMAP_OWN_LOG,
|
|
|
|
};
|
2024-08-30 22:37:15 +00:00
|
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
|
|
xfs_fsblock_t start_fsb, end_fsb;
|
|
|
|
uint64_t eofs;
|
|
|
|
|
|
|
|
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
|
|
|
|
if (keys[0].fmr_physical >= eofs)
|
|
|
|
return 0;
|
|
|
|
start_fsb = XFS_BB_TO_FSBT(mp,
|
|
|
|
keys[0].fmr_physical + keys[0].fmr_length);
|
|
|
|
end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
|
|
|
|
|
|
|
/* Adjust the low key if we are continuing from where we left off. */
|
|
|
|
if (keys[0].fmr_length > 0)
|
|
|
|
info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb);
|
|
|
|
trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb);
|
2024-08-30 22:37:15 +00:00
|
|
|
|
|
|
|
if (start_fsb > 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Fabricate an rmap entry for the external log device. */
|
2024-11-04 04:19:03 +00:00
|
|
|
frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
|
|
|
|
return xfs_getfsmap_helper(tp, info, &frec);
|
2024-08-30 22:37:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_XFS_RT
|
|
|
|
/* Transform a rtbitmap "record" into a fsmap */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_rtdev_rtbitmap_helper(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
const struct xfs_rtalloc_rec *rec,
|
|
|
|
void *priv)
|
|
|
|
{
|
2024-11-04 04:19:03 +00:00
|
|
|
struct xfs_fsmap_irec frec = {
|
|
|
|
.owner = XFS_RMAP_OWN_NULL, /* "free" */
|
|
|
|
};
|
2024-08-30 22:37:15 +00:00
|
|
|
struct xfs_getfsmap_info *info = priv;
|
|
|
|
xfs_rtblock_t rtbno;
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
/*
|
|
|
|
* For an info->last query, we're looking for a gap between the last
|
|
|
|
* mapping emitted and the high key specified by userspace. If the
|
|
|
|
* user's query spans less than 1 fsblock, then info->high and
|
|
|
|
* info->low will have the same rm_startblock, which causes rec_daddr
|
|
|
|
* and next_daddr to be the same. Therefore, use the end_daddr that
|
|
|
|
* we calculated from userspace's high key to synthesize the record.
|
|
|
|
* Note that if the btree query found a mapping, there won't be a gap.
|
|
|
|
*/
|
|
|
|
if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) {
|
|
|
|
frec.start_daddr = info->end_daddr;
|
|
|
|
} else {
|
|
|
|
rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
|
|
|
|
frec.start_daddr = XFS_FSB_TO_BB(mp, rtbno);
|
|
|
|
}
|
2024-08-30 22:37:15 +00:00
|
|
|
|
|
|
|
rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount);
|
2024-11-04 04:19:03 +00:00
|
|
|
frec.len_daddr = XFS_FSB_TO_BB(mp, rtbno);
|
|
|
|
return xfs_getfsmap_helper(tp, info, &frec);
|
2024-08-30 22:37:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Execute a getfsmap query against the realtime device rtbitmap. */
|
|
|
|
STATIC int
|
|
|
|
xfs_getfsmap_rtdev_rtbitmap(
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
const struct xfs_fsmap *keys,
|
|
|
|
struct xfs_getfsmap_info *info)
|
|
|
|
{
|
|
|
|
|
|
|
|
struct xfs_rtalloc_rec ahigh = { 0 };
|
|
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
|
|
xfs_rtblock_t start_rtb;
|
|
|
|
xfs_rtblock_t end_rtb;
|
|
|
|
xfs_rtxnum_t high;
|
|
|
|
uint64_t eofs;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents));
|
|
|
|
if (keys[0].fmr_physical >= eofs)
|
|
|
|
return 0;
|
|
|
|
start_rtb = XFS_BB_TO_FSBT(mp,
|
|
|
|
keys[0].fmr_physical + keys[0].fmr_length);
|
|
|
|
end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
|
|
|
|
|
|
|
info->missing_owner = XFS_FMR_OWN_UNKNOWN;
|
|
|
|
|
|
|
|
/* Adjust the low key if we are continuing from where we left off. */
|
|
|
|
if (keys[0].fmr_length > 0) {
|
|
|
|
info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
|
|
|
|
if (info->low_daddr >= eofs)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-11-04 04:19:03 +00:00
|
|
|
trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtb);
|
|
|
|
trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtb);
|
2024-08-30 22:37:15 +00:00
|
|
|
|
|
|
|
xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up query parameters to return free rtextents covering the range
|
|
|
|
* we want.
|
|
|
|
*/
|
|
|
|
high = xfs_rtb_to_rtxup(mp, end_rtb);
|
|
|
|
error = xfs_rtalloc_query_range(mp, tp, xfs_rtb_to_rtx(mp, start_rtb),
|
|
|
|
high, xfs_getfsmap_rtdev_rtbitmap_helper, info);
|
|
|
|
if (error)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report any gaps at the end of the rtbitmap by simulating a null
|
|
|
|
* rmap starting at the block after the end of the query range.
|
|
|
|
*/
|
|
|
|
info->last = true;
|
|
|
|
ahigh.ar_startext = min(mp->m_sb.sb_rextents, high);
|
|
|
|
|
|
|
|
error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info);
|
|
|
|
if (error)
|
|
|
|
goto err;
|
|
|
|
err:
|
|
|
|
xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_XFS_RT */
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/* Do we recognize the device? */
|
|
|
|
STATIC bool
|
|
|
|
xfs_getfsmap_is_valid_device(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_fsmap *fm)
|
|
|
|
{
|
|
|
|
if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
|
|
|
|
fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
|
|
|
|
return true;
|
|
|
|
if (mp->m_logdev_targp &&
|
|
|
|
fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
|
|
|
|
return true;
|
2017-03-28 21:56:38 +00:00
|
|
|
if (mp->m_rtdev_targp &&
|
|
|
|
fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
|
|
|
|
return true;
|
2017-03-28 21:56:37 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure that the low key is less than the high key. */
|
|
|
|
STATIC bool
|
|
|
|
xfs_getfsmap_check_keys(
|
|
|
|
struct xfs_fsmap *low_key,
|
|
|
|
struct xfs_fsmap *high_key)
|
|
|
|
{
|
2023-06-30 00:39:45 +00:00
|
|
|
if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
|
|
|
|
if (low_key->fmr_offset)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (high_key->fmr_flags != -1U &&
|
|
|
|
(high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
|
|
|
|
FMR_OF_EXTENT_MAP))) {
|
|
|
|
if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (high_key->fmr_length && high_key->fmr_length != -1ULL)
|
|
|
|
return false;
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
if (low_key->fmr_device > high_key->fmr_device)
|
|
|
|
return false;
|
|
|
|
if (low_key->fmr_device < high_key->fmr_device)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (low_key->fmr_physical > high_key->fmr_physical)
|
|
|
|
return false;
|
|
|
|
if (low_key->fmr_physical < high_key->fmr_physical)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (low_key->fmr_owner > high_key->fmr_owner)
|
|
|
|
return false;
|
|
|
|
if (low_key->fmr_owner < high_key->fmr_owner)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (low_key->fmr_offset > high_key->fmr_offset)
|
|
|
|
return false;
|
|
|
|
if (low_key->fmr_offset < high_key->fmr_offset)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-10-09 18:37:22 +00:00
|
|
|
/*
|
|
|
|
* There are only two devices if we didn't configure RT devices at build time.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_XFS_RT
|
2017-03-28 21:56:38 +00:00
|
|
|
#define XFS_GETFSMAP_DEVS 3
|
2017-10-09 18:37:22 +00:00
|
|
|
#else
|
|
|
|
#define XFS_GETFSMAP_DEVS 2
|
|
|
|
#endif /* CONFIG_XFS_RT */
|
|
|
|
|
2017-03-28 21:56:37 +00:00
|
|
|
/*
|
2020-10-01 17:56:07 +00:00
|
|
|
* Get filesystem's extents as described in head, and format for output. Fills
|
|
|
|
* in the supplied records array until there are no more reverse mappings to
|
|
|
|
* return or head.fmh_entries == head.fmh_count. In the second case, this
|
|
|
|
* function returns -ECANCELED to indicate that more records would have been
|
|
|
|
* returned.
|
2017-03-28 21:56:37 +00:00
|
|
|
*
|
|
|
|
* Key to Confusion
|
|
|
|
* ----------------
|
|
|
|
* There are multiple levels of keys and counters at work here:
|
|
|
|
* xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
|
2023-06-30 00:39:45 +00:00
|
|
|
* these reflect fs-wide sector addrs.
|
2017-03-28 21:56:37 +00:00
|
|
|
* dkeys -- fmh_keys used to query each device;
|
2023-06-30 00:39:45 +00:00
|
|
|
* these are fmh_keys but w/ the low key
|
|
|
|
* bumped up by fmr_length.
|
2017-03-28 21:56:37 +00:00
|
|
|
* xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
|
|
|
|
* is how we detect gaps in the fsmap
|
|
|
|
records and report them.
|
|
|
|
* xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
|
2023-06-30 00:39:45 +00:00
|
|
|
* dkeys; used to query the metadata.
|
2017-03-28 21:56:37 +00:00
|
|
|
*/
|
2024-08-30 22:37:16 +00:00
|
|
|
STATIC int
|
2017-03-28 21:56:37 +00:00
|
|
|
xfs_getfsmap(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_fsmap_head *head,
|
2020-10-01 17:56:07 +00:00
|
|
|
struct fsmap *fsmap_recs)
|
2017-03-28 21:56:37 +00:00
|
|
|
{
|
|
|
|
struct xfs_trans *tp = NULL;
|
|
|
|
struct xfs_fsmap dkeys[2]; /* per-dev keys */
|
|
|
|
struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
|
2017-04-21 18:24:39 +00:00
|
|
|
struct xfs_getfsmap_info info = { NULL };
|
2017-05-12 17:44:10 +00:00
|
|
|
bool use_rmap;
|
2017-03-28 21:56:37 +00:00
|
|
|
int i;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (head->fmh_iflags & ~FMH_IF_VALID)
|
|
|
|
return -EINVAL;
|
|
|
|
if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
|
|
|
|
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
|
|
|
|
return -EINVAL;
|
2023-06-30 00:39:45 +00:00
|
|
|
if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
|
|
|
|
return -EINVAL;
|
2017-03-28 21:56:37 +00:00
|
|
|
|
2022-02-26 00:18:30 +00:00
|
|
|
use_rmap = xfs_has_rmapbt(mp) &&
|
|
|
|
has_capability_noaudit(current, CAP_SYS_ADMIN);
|
2017-03-28 21:56:37 +00:00
|
|
|
head->fmh_entries = 0;
|
|
|
|
|
|
|
|
/* Set up our device handlers. */
|
|
|
|
memset(handlers, 0, sizeof(handlers));
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
|
2017-03-28 21:56:37 +00:00
|
|
|
handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
|
2017-05-12 17:44:10 +00:00
|
|
|
if (use_rmap)
|
2017-03-28 21:56:37 +00:00
|
|
|
handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
|
|
|
|
else
|
|
|
|
handlers[0].fn = xfs_getfsmap_datadev_bnobt;
|
2017-03-28 21:56:37 +00:00
|
|
|
if (mp->m_logdev_targp != mp->m_ddev_targp) {
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
handlers[1].nr_sectors = XFS_FSB_TO_BB(mp,
|
|
|
|
mp->m_sb.sb_logblocks);
|
2017-03-28 21:56:37 +00:00
|
|
|
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
|
|
|
|
handlers[1].fn = xfs_getfsmap_logdev;
|
|
|
|
}
|
2017-10-09 18:37:22 +00:00
|
|
|
#ifdef CONFIG_XFS_RT
|
2017-03-28 21:56:38 +00:00
|
|
|
if (mp->m_rtdev_targp) {
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
|
2017-03-28 21:56:38 +00:00
|
|
|
handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
|
|
|
|
handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
|
|
|
|
}
|
2017-10-09 18:37:22 +00:00
|
|
|
#endif /* CONFIG_XFS_RT */
|
2017-03-28 21:56:37 +00:00
|
|
|
|
|
|
|
xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
|
|
|
|
xfs_getfsmap_dev_compare);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To continue where we left off, we allow userspace to use the
|
|
|
|
* last mapping from a previous call as the low key of the next.
|
|
|
|
* This is identified by a non-zero length in the low key. We
|
|
|
|
* have to increment the low key in this scenario to ensure we
|
|
|
|
* don't return the same mapping again, and instead return the
|
|
|
|
* very next mapping.
|
|
|
|
*
|
|
|
|
* If the low key mapping refers to file data, the same physical
|
|
|
|
* blocks could be mapped to several other files/offsets.
|
|
|
|
* According to rmapbt record ordering, the minimal next
|
|
|
|
* possible record for the block range is the next starting
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
* offset in the same inode. Therefore, each fsmap backend bumps
|
|
|
|
* the file offset to continue the search appropriately. For
|
|
|
|
* all other low key mapping types (attr blocks, metadata), each
|
|
|
|
* fsmap backend bumps the physical offset as there can be no
|
|
|
|
* other mapping for the same physical block range.
|
2017-03-28 21:56:37 +00:00
|
|
|
*/
|
|
|
|
dkeys[0] = head->fmh_keys[0];
|
|
|
|
memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
|
|
|
|
|
|
|
|
info.next_daddr = head->fmh_keys[0].fmr_physical +
|
|
|
|
head->fmh_keys[0].fmr_length;
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
info.end_daddr = XFS_BUF_DADDR_NULL;
|
2020-10-01 17:56:07 +00:00
|
|
|
info.fsmap_recs = fsmap_recs;
|
2017-03-28 21:56:37 +00:00
|
|
|
info.head = head;
|
|
|
|
|
|
|
|
/* For each device we support... */
|
|
|
|
for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
|
|
|
|
/* Is this device within the range the user asked for? */
|
|
|
|
if (!handlers[i].fn)
|
|
|
|
continue;
|
|
|
|
if (head->fmh_keys[0].fmr_device > handlers[i].dev)
|
|
|
|
continue;
|
|
|
|
if (head->fmh_keys[1].fmr_device < handlers[i].dev)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this device number matches the high key, we have
|
|
|
|
* to pass the high key to the handler to limit the
|
|
|
|
* query results. If the device number exceeds the
|
|
|
|
* low key, zero out the low key so that we get
|
|
|
|
* everything from the beginning.
|
|
|
|
*/
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
|
2017-03-28 21:56:37 +00:00
|
|
|
dkeys[1] = head->fmh_keys[1];
|
xfs: Fix missing interval for missing_owner in xfs fsmap
In the fsmap query of xfs, there is an interval missing problem:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [0..7]: static fs metadata 0 (0..7) 8
1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16
2: 253:16 [24..39]: inode btree 0 (24..39) 16
3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8
4: 253:16 [48..55]: refcount btree 0 (48..55) 8
5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48
6: 253:16 [104..127]: free space 0 (104..127) 24
......
BUG:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
[root@fedora ~]#
Normally, we should be able to get [104, 107), but we got nothing.
The problem is caused by shifting. The query for the problem-triggered
scenario is for the missing_owner interval (e.g. freespace in rmapbt/
unknown space in bnobt), which is obtained by subtraction (gap). For this
scenario, the interval is obtained by info->last. However, rec_daddr is
calculated based on the start_block recorded in key[1], which is converted
by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed
info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log
<= info->next_daddr, no records will be displayed. In the above example,
104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two
are reduced to 0 and the gap is ignored:
before calculate ----------------> after shifting
104(st) 107(ed) 12(st/ed)
|---------| |
sector size block size
Resolve this issue by introducing the "end_daddr" field in
xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at
the granularity of sector. If the current query is the last, the rec_daddr
is end_daddr to prevent missing interval problems caused by shifting. We
only need to focus on the last query, because xfs disks are internally
aligned with disk blocksize that are powers of two and minimum 512, so
there is no problem with shifting in previous queries.
After applying this patch, the above problem have been solved:
[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 253:16 [104..106]: free space 0 (104..106) 3
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: limit the range of end_addr correctly]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-08-23 00:00:35 +00:00
|
|
|
info.end_daddr = min(handlers[i].nr_sectors - 1,
|
|
|
|
dkeys[1].fmr_physical);
|
|
|
|
}
|
2017-03-28 21:56:37 +00:00
|
|
|
if (handlers[i].dev > head->fmh_keys[0].fmr_device)
|
|
|
|
memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
|
|
|
|
|
2021-03-22 16:51:50 +00:00
|
|
|
/*
|
|
|
|
* Grab an empty transaction so that we can use its recursive
|
|
|
|
* buffer locking abilities to detect cycles in the rmapbt
|
|
|
|
* without deadlocking.
|
|
|
|
*/
|
2017-03-28 21:56:37 +00:00
|
|
|
error = xfs_trans_alloc_empty(mp, &tp);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
|
|
|
|
info.dev = handlers[i].dev;
|
|
|
|
info.last = false;
|
2024-11-04 04:18:45 +00:00
|
|
|
info.group = NULL;
|
2024-08-23 00:00:20 +00:00
|
|
|
info.low_daddr = XFS_BUF_DADDR_NULL;
|
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries:
# xfs_io -c 'fsmap -vvvv' /opt
EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL
0: 8:80 [0..7]: static fs metadata 0 (0..7) 8
<snip>
9: 8:80 [192..223]: 137 0..31 0 (192..223) 32
# xfs_io -c 'fsmap -vvvv -d 208 208' /opt
#
That's not right -- we asked what block maps block 208, and we should've
received a mapping for inode 137 offset 16. Instead, we get nothing.
The root cause of this problem is a mis-interaction between the fsmap
code and how btree ranged queries work. xfs_btree_query_range returns
any btree record that overlaps with the query interval, even if the
record starts before or ends after the interval. Similarly, GETFSMAP is
supposed to return a recordset containing all records that overlap the
range queried.
However, it's possible that the recordset is larger than the buffer that
the caller provided to convey mappings to userspace. In /that/ case,
userspace is supposed to copy the last record returned to fmh_keys[0]
and call GETFSMAP again. In this case, we do not want to return
mappings that we have already supplied to the caller. The call to
xfs_btree_query_range is the same, but now we ignore any records that
start before fmh_keys[0].
Unfortunately, we didn't implement the filtering predicate correctly.
The predicate should only be called when we're calling back for more
records. Accomplish this by setting info->low.rm_blockcount to a
nonzero value and ensuring that it is cleared as necessary. As a
result, we no longer want to adjust dkeys[0] in the main setup function
because that's confusing.
This patch doesn't touch the logdev/rtbitmap backends because they have
bigger problems that will be addressed by subsequent patches.
Found via xfs/556 with parent pointers enabled.
Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-06-30 00:39:43 +00:00
|
|
|
info.low.rm_blockcount = 0;
|
2017-03-28 21:56:37 +00:00
|
|
|
error = handlers[i].fn(tp, dkeys, &info);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
xfs_trans_cancel(tp);
|
|
|
|
tp = NULL;
|
|
|
|
info.next_daddr = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tp)
|
|
|
|
xfs_trans_cancel(tp);
|
|
|
|
head->fmh_oflags = FMH_OF_DEV_T;
|
|
|
|
return error;
|
|
|
|
}
|
2024-08-30 22:37:16 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
xfs_ioc_getfsmap(
|
|
|
|
struct xfs_inode *ip,
|
|
|
|
struct fsmap_head __user *arg)
|
|
|
|
{
|
|
|
|
struct xfs_fsmap_head xhead = {0};
|
|
|
|
struct fsmap_head head;
|
|
|
|
struct fsmap *recs;
|
|
|
|
unsigned int count;
|
|
|
|
__u32 last_flags = 0;
|
|
|
|
bool done = false;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
|
|
|
|
return -EFAULT;
|
|
|
|
if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
|
|
|
|
memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
|
|
|
|
sizeof(head.fmh_keys[0].fmr_reserved)) ||
|
|
|
|
memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
|
|
|
|
sizeof(head.fmh_keys[1].fmr_reserved)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use an internal memory buffer so that we don't have to copy fsmap
|
|
|
|
* data to userspace while holding locks. Start by trying to allocate
|
|
|
|
* up to 128k for the buffer, but fall back to a single page if needed.
|
|
|
|
*/
|
|
|
|
count = min_t(unsigned int, head.fmh_count,
|
|
|
|
131072 / sizeof(struct fsmap));
|
|
|
|
recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
|
|
|
|
if (!recs) {
|
|
|
|
count = min_t(unsigned int, head.fmh_count,
|
|
|
|
PAGE_SIZE / sizeof(struct fsmap));
|
|
|
|
recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
|
|
|
|
if (!recs)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
xhead.fmh_iflags = head.fmh_iflags;
|
|
|
|
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
|
|
|
|
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
|
|
|
|
|
|
|
|
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
|
|
|
|
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
|
|
|
|
|
|
|
|
head.fmh_entries = 0;
|
|
|
|
do {
|
|
|
|
struct fsmap __user *user_recs;
|
|
|
|
struct fsmap *last_rec;
|
|
|
|
|
|
|
|
user_recs = &arg->fmh_recs[head.fmh_entries];
|
|
|
|
xhead.fmh_entries = 0;
|
|
|
|
xhead.fmh_count = min_t(unsigned int, count,
|
|
|
|
head.fmh_count - head.fmh_entries);
|
|
|
|
|
|
|
|
/* Run query, record how many entries we got. */
|
|
|
|
error = xfs_getfsmap(ip->i_mount, &xhead, recs);
|
|
|
|
switch (error) {
|
|
|
|
case 0:
|
|
|
|
/*
|
|
|
|
* There are no more records in the result set. Copy
|
|
|
|
* whatever we got to userspace and break out.
|
|
|
|
*/
|
|
|
|
done = true;
|
|
|
|
break;
|
|
|
|
case -ECANCELED:
|
|
|
|
/*
|
|
|
|
* The internal memory buffer is full. Copy whatever
|
|
|
|
* records we got to userspace and go again if we have
|
|
|
|
* not yet filled the userspace buffer.
|
|
|
|
*/
|
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
head.fmh_entries += xhead.fmh_entries;
|
|
|
|
head.fmh_oflags = xhead.fmh_oflags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the caller wanted a record count or there aren't any
|
|
|
|
* new records to return, we're done.
|
|
|
|
*/
|
|
|
|
if (head.fmh_count == 0 || xhead.fmh_entries == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Copy all the records we got out to userspace. */
|
|
|
|
if (copy_to_user(user_recs, recs,
|
|
|
|
xhead.fmh_entries * sizeof(struct fsmap))) {
|
|
|
|
error = -EFAULT;
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remember the last record flags we copied to userspace. */
|
|
|
|
last_rec = &recs[xhead.fmh_entries - 1];
|
|
|
|
last_flags = last_rec->fmr_flags;
|
|
|
|
|
|
|
|
/* Set up the low key for the next iteration. */
|
|
|
|
xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
|
|
|
|
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
|
|
|
|
} while (!done && head.fmh_entries < head.fmh_count);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there are no more records in the query result set and we're not
|
|
|
|
* in counting mode, mark the last record returned with the LAST flag.
|
|
|
|
*/
|
|
|
|
if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
|
|
|
|
struct fsmap __user *user_rec;
|
|
|
|
|
|
|
|
last_flags |= FMR_OF_LAST;
|
|
|
|
user_rec = &arg->fmh_recs[head.fmh_entries - 1];
|
|
|
|
|
|
|
|
if (copy_to_user(&user_rec->fmr_flags, &last_flags,
|
|
|
|
sizeof(last_flags))) {
|
|
|
|
error = -EFAULT;
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy back header */
|
|
|
|
if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
|
|
|
|
error = -EFAULT;
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_free:
|
|
|
|
kvfree(recs);
|
|
|
|
return error;
|
|
|
|
}
|