mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
a9ce385344
dm looks up the table for IO based on the request type, with an
assumption that if the request is marked REQ_NOWAIT, it's fine to
attempt to submit that IO while under RCU read lock protection. This
is not OK, as REQ_NOWAIT just means that we should not be sleeping
waiting on other IO, it does not mean that we can't potentially
schedule.
A simple test case demonstrates this quite nicely:
int main(int argc, char *argv[])
{
struct iovec iov;
int fd;
fd = open("/dev/dm-0", O_RDONLY | O_DIRECT);
posix_memalign(&iov.iov_base, 4096, 4096);
iov.iov_len = 4096;
preadv2(fd, &iov, 1, 0, RWF_NOWAIT);
return 0;
}
which will instantly spew:
BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5580, name: dm-nowait
preempt_count: 0, expected: 0
RCU nest depth: 1, expected: 0
INFO: lockdep is turned off.
CPU: 7 PID: 5580 Comm: dm-nowait Not tainted 6.6.0-rc1-g39956d2dcd81 #132
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x11d/0x1b0
__might_resched+0x3c3/0x5e0
? preempt_count_sub+0x150/0x150
mempool_alloc+0x1e2/0x390
? mempool_resize+0x7d0/0x7d0
? lock_sync+0x190/0x190
? lock_release+0x4b7/0x670
? internal_get_user_pages_fast+0x868/0x2d40
bio_alloc_bioset+0x417/0x8c0
? bvec_alloc+0x200/0x200
? internal_get_user_pages_fast+0xb8c/0x2d40
bio_alloc_clone+0x53/0x100
dm_submit_bio+0x27f/0x1a20
? lock_release+0x4b7/0x670
? blk_try_enter_queue+0x1a0/0x4d0
? dm_dax_direct_access+0x260/0x260
? rcu_is_watching+0x12/0xb0
? blk_try_enter_queue+0x1cc/0x4d0
__submit_bio+0x239/0x310
? __bio_queue_enter+0x700/0x700
? kvm_clock_get_cycles+0x40/0x60
? ktime_get+0x285/0x470
submit_bio_noacct_nocheck+0x4d9/0xb80
? should_fail_request+0x80/0x80
? preempt_count_sub+0x150/0x150
? lock_release+0x4b7/0x670
? __bio_add_page+0x143/0x2d0
? iov_iter_revert+0x27/0x360
submit_bio_noacct+0x53e/0x1b30
submit_bio_wait+0x10a/0x230
? submit_bio_wait_endio+0x40/0x40
__blkdev_direct_IO_simple+0x4f8/0x780
? blkdev_bio_end_io+0x4c0/0x4c0
? stack_trace_save+0x90/0xc0
? __bio_clone+0x3c0/0x3c0
? lock_release+0x4b7/0x670
? lock_sync+0x190/0x190
? atime_needs_update+0x3bf/0x7e0
? timestamp_truncate+0x21b/0x2d0
? inode_owner_or_capable+0x240/0x240
blkdev_direct_IO.part.0+0x84a/0x1810
? rcu_is_watching+0x12/0xb0
? lock_release+0x4b7/0x670
? blkdev_read_iter+0x40d/0x530
? reacquire_held_locks+0x4e0/0x4e0
? __blkdev_direct_IO_simple+0x780/0x780
? rcu_is_watching+0x12/0xb0
? __mark_inode_dirty+0x297/0xd50
? preempt_count_add+0x72/0x140
blkdev_read_iter+0x2a4/0x530
do_iter_readv_writev+0x2f2/0x3c0
? generic_copy_file_range+0x1d0/0x1d0
? fsnotify_perm.part.0+0x25d/0x630
? security_file_permission+0xd8/0x100
do_iter_read+0x31b/0x880
? import_iovec+0x10b/0x140
vfs_readv+0x12d/0x1a0
? vfs_iter_read+0xb0/0xb0
? rcu_is_watching+0x12/0xb0
? rcu_is_watching+0x12/0xb0
? lock_release+0x4b7/0x670
do_preadv+0x1b3/0x260
? do_readv+0x370/0x370
__x64_sys_preadv2+0xef/0x150
do_syscall_64+0x39/0xb0
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f5af41ad806
Code: 41 54 41 89 fc 55 44 89 c5 53 48 89 cb 48 83 ec 18 80 3d e4 dd 0d 00 00 74 7a 45 89 c1 49 89 ca 45 31 c0 b8 47 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 be 00 00 00 48 85 c0 79 4a 48 8b 0d da 55
RSP: 002b:00007ffd3145c7f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000147
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5af41ad806
RDX: 0000000000000001 RSI: 00007ffd3145c850 RDI: 0000000000000003
RBP: 0000000000000008 R08: 0000000000000000 R09: 0000000000000008
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
R13: 00007ffd3145c850 R14: 000055f5f0431dd8 R15: 0000000000000001
</TASK>
where in fact it is dm itself that attempts to allocate a bio clone with
GFP_NOIO under the rcu read lock, regardless of the request type.
Fix this by getting rid of the special casing for REQ_NOWAIT, and just
use the normal SRCU protected table lookup. Get rid of the bio based
table locking helpers at the same time, as they are now unused.
Cc: stable@vger.kernel.org
Fixes:
|
||
---|---|---|
.. | ||
bcache | ||
persistent-data | ||
dm-audit.c | ||
dm-audit.h | ||
dm-bio-prison-v1.c | ||
dm-bio-prison-v1.h | ||
dm-bio-prison-v2.c | ||
dm-bio-prison-v2.h | ||
dm-bio-record.h | ||
dm-bufio.c | ||
dm-builtin.c | ||
dm-cache-background-tracker.c | ||
dm-cache-background-tracker.h | ||
dm-cache-block-types.h | ||
dm-cache-metadata.c | ||
dm-cache-metadata.h | ||
dm-cache-policy-internal.h | ||
dm-cache-policy-smq.c | ||
dm-cache-policy.c | ||
dm-cache-policy.h | ||
dm-cache-target.c | ||
dm-clone-metadata.c | ||
dm-clone-metadata.h | ||
dm-clone-target.c | ||
dm-core.h | ||
dm-crypt.c | ||
dm-delay.c | ||
dm-dust.c | ||
dm-ebs-target.c | ||
dm-era-target.c | ||
dm-exception-store.c | ||
dm-exception-store.h | ||
dm-flakey.c | ||
dm-ima.c | ||
dm-ima.h | ||
dm-init.c | ||
dm-integrity.c | ||
dm-io-rewind.c | ||
dm-io-tracker.h | ||
dm-io.c | ||
dm-ioctl.c | ||
dm-kcopyd.c | ||
dm-linear.c | ||
dm-log-userspace-base.c | ||
dm-log-userspace-transfer.c | ||
dm-log-userspace-transfer.h | ||
dm-log-writes.c | ||
dm-log.c | ||
dm-mpath.c | ||
dm-mpath.h | ||
dm-path-selector.c | ||
dm-path-selector.h | ||
dm-ps-historical-service-time.c | ||
dm-ps-io-affinity.c | ||
dm-ps-queue-length.c | ||
dm-ps-round-robin.c | ||
dm-ps-service-time.c | ||
dm-raid1.c | ||
dm-raid.c | ||
dm-region-hash.c | ||
dm-rq.c | ||
dm-rq.h | ||
dm-snap-persistent.c | ||
dm-snap-transient.c | ||
dm-snap.c | ||
dm-stats.c | ||
dm-stats.h | ||
dm-stripe.c | ||
dm-switch.c | ||
dm-sysfs.c | ||
dm-table.c | ||
dm-target.c | ||
dm-thin-metadata.c | ||
dm-thin-metadata.h | ||
dm-thin.c | ||
dm-uevent.c | ||
dm-uevent.h | ||
dm-unstripe.c | ||
dm-verity-fec.c | ||
dm-verity-fec.h | ||
dm-verity-loadpin.c | ||
dm-verity-target.c | ||
dm-verity-verify-sig.c | ||
dm-verity-verify-sig.h | ||
dm-verity.h | ||
dm-writecache.c | ||
dm-zero.c | ||
dm-zone.c | ||
dm-zoned-metadata.c | ||
dm-zoned-reclaim.c | ||
dm-zoned-target.c | ||
dm-zoned.h | ||
dm.c | ||
dm.h | ||
Kconfig | ||
Makefile | ||
md-autodetect.c | ||
md-bitmap.c | ||
md-bitmap.h | ||
md-cluster.c | ||
md-cluster.h | ||
md-faulty.c | ||
md-linear.c | ||
md-linear.h | ||
md-multipath.c | ||
md-multipath.h | ||
md.c | ||
md.h | ||
raid0.c | ||
raid0.h | ||
raid1-10.c | ||
raid1.c | ||
raid1.h | ||
raid5-cache.c | ||
raid5-log.h | ||
raid5-ppl.c | ||
raid5.c | ||
raid5.h | ||
raid10.c | ||
raid10.h |