diff --git a/block/bio.c b/block/bio.c index 4f184d938942..dbabd48b1934 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1125,7 +1125,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, int i, ret; int nr_pages = 0; unsigned int len = iter->count; - unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; + unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; for (i = 0; i < iter->nr_segs; i++) { unsigned long uaddr; @@ -1304,7 +1304,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, goto out_unmap; } - offset = uaddr & ~PAGE_MASK; + offset = offset_in_page(uaddr); for (j = cur_page; j < page_limit; j++) { unsigned int bytes = PAGE_SIZE - offset; diff --git a/block/blk-core.c b/block/blk-core.c index 33e2f62d5062..476244d59309 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -51,7 +51,7 @@ DEFINE_IDA(blk_queue_ida); /* * For the allocated request tables */ -struct kmem_cache *request_cachep = NULL; +struct kmem_cache *request_cachep; /* * For queue allocation @@ -646,7 +646,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); -int blk_queue_enter(struct request_queue *q, gfp_t gfp) +int blk_queue_enter(struct request_queue *q, bool nowait) { while (true) { int ret; @@ -654,7 +654,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp) if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; - if (!gfpflags_allow_blocking(gfp)) + if (nowait) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, @@ -1292,7 +1292,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { if (q->mq_ops) - return blk_mq_alloc_request(q, rw, gfp_mask, false); + return blk_mq_alloc_request(q, rw, + (gfp_mask & __GFP_DIRECT_RECLAIM) ? + 0 : BLK_MQ_REQ_NOWAIT); else return blk_old_get_request(q, rw, gfp_mask); } @@ -2060,8 +2062,7 @@ blk_qc_t generic_make_request(struct bio *bio) do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { - + if (likely(blk_queue_enter(q, false) == 0)) { ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -3534,7 +3535,7 @@ int __init blk_dev_init(void) request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), 0, SLAB_PANIC, NULL); - blk_requestq_cachep = kmem_cache_create("blkdev_queue", + blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); return 0; diff --git a/block/blk-merge.c b/block/blk-merge.c index e01405a3e8b3..1699df5b0493 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,6 +7,8 @@ #include #include +#include + #include "blk.h" static struct bio *blk_bio_discard_split(struct request_queue *q, @@ -81,9 +83,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *new = NULL; bio_for_each_segment(bv, bio, iter) { - if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) - goto split; - /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. @@ -91,6 +90,22 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) goto split; + if (sectors + (bv.bv_len >> 9) > + blk_max_size_offset(q, bio->bi_iter.bi_sector)) { + /* + * Consider this a new segment if we're splitting in + * the middle of this vector. + */ + if (nsegs < queue_max_segments(q) && + sectors < blk_max_size_offset(q, + bio->bi_iter.bi_sector)) { + nsegs++; + sectors = blk_max_size_offset(q, + bio->bi_iter.bi_sector); + } + goto split; + } + if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; @@ -162,6 +177,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio, split->bi_rw |= REQ_NOMERGE; bio_chain(split, *bio); + trace_block_split(q, split, (*bio)->bi_iter.bi_sector); generic_make_request(*bio); *bio = split; } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 8764c241e5bb..d0634bcf322f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -113,7 +113,7 @@ int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index) for_each_possible_cpu(i) { if (index == mq_map[i]) - return cpu_to_node(i); + return local_memory_node(cpu_to_node(i)); } return NUMA_NO_NODE; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index a07ca3488d96..abdbb47405cb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data, if (tag != -1) return tag; - if (!gfpflags_allow_blocking(data->gfp)) + if (data->flags & BLK_MQ_REQ_NOWAIT) return -1; bs = bt_wait_ptr(bt, hctx); @@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data, data->ctx = blk_mq_get_ctx(data->q); data->hctx = data->q->mq_ops->map_queue(data->q, data->ctx->cpu); - if (data->reserved) { + if (data->flags & BLK_MQ_REQ_RESERVED) { bt = &data->hctx->tags->breserved_tags; } else { last_tag = &data->ctx->last_tag; @@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) { - if (!data->reserved) - return __blk_mq_get_tag(data); - - return __blk_mq_get_reserved_tag(data); + if (data->flags & BLK_MQ_REQ_RESERVED) + return __blk_mq_get_reserved_tag(data); + return __blk_mq_get_tag(data); } static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6d6f8feb48c0..6889d7183a2a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -229,8 +229,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) return NULL; } -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, - bool reserved) +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, + unsigned int flags) { struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; @@ -238,24 +238,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_alloc_data alloc_data; int ret; - ret = blk_queue_enter(q, gfp); + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); if (ret) return ERR_PTR(ret); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM, - reserved, ctx, hctx); + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); - if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) { + if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx, - hctx); + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); ctx = alloc_data.ctx; } @@ -1175,8 +1173,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, rw |= REQ_SYNC; trace_block_getrq(q, bio, rw); - blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx, - hctx); + blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); if (unlikely(!rq)) { __blk_mq_run_hw_queue(hctx); @@ -1185,8 +1182,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, - __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx); + blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); ctx = alloc_data.ctx; hctx = alloc_data.hctx; @@ -1794,7 +1790,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, * not, we remain on the home node of the device */ if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) - hctx->numa_node = cpu_to_node(i); + hctx->numa_node = local_memory_node(cpu_to_node(i)); } } @@ -1854,6 +1850,7 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); + cpumask_copy(hctx->tags->cpumask, hctx->cpumask); /* * Set the map size to the number of mapped software queues. * This is more accurate and more efficient than looping @@ -1867,14 +1864,6 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx->next_cpu = cpumask_first(hctx->cpumask); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } - - queue_for_each_ctx(q, ctx, i) { - if (!cpumask_test_cpu(i, online_mask)) - continue; - - hctx = q->mq_ops->map_queue(q, i); - cpumask_set_cpu(i, hctx->tags->cpumask); - } } static void queue_set_hctx_shared(struct request_queue *q, bool shared) diff --git a/block/blk-mq.h b/block/blk-mq.h index 713820b47b31..eaede8e45c9c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) struct blk_mq_alloc_data { /* input parameter */ struct request_queue *q; - gfp_t gfp; - bool reserved; + unsigned int flags; /* input & output parameter */ struct blk_mq_ctx *ctx; @@ -105,13 +104,11 @@ struct blk_mq_alloc_data { }; static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, - struct request_queue *q, gfp_t gfp, bool reserved, - struct blk_mq_ctx *ctx, - struct blk_mq_hw_ctx *hctx) + struct request_queue *q, unsigned int flags, + struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx) { data->q = q; - data->gfp = gfp; - data->reserved = reserved; + data->flags = flags; data->ctx = ctx; data->hctx = hctx; } diff --git a/block/blk-timeout.c b/block/blk-timeout.c index aa40aa93381b..3610af561748 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -186,6 +186,7 @@ unsigned long blk_rq_timeout(unsigned long timeout) * Notes: * Each request has its own timer, and as it is added to the queue, we * set up the timer. When the request completes, we cancel the timer. + * Queue lock must be held for the non-mq case, mq case doesn't care. */ void blk_add_timer(struct request *req) { @@ -209,6 +210,11 @@ void blk_add_timer(struct request *req) req->timeout = q->rq_timeout; req->deadline = jiffies + req->timeout; + + /* + * Only the non-mq case needs to add the request to a protected list. + * For the mq case we simply scan the tag map. + */ if (!q->mq_ops) list_add_tail(&req->timeout_list, &req->q->timeout_list); diff --git a/block/genhd.c b/block/genhd.c index 5aaeb2ad0fd3..9f42526b4d62 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1449,7 +1449,7 @@ static DEFINE_MUTEX(disk_events_mutex); static LIST_HEAD(disk_events); /* disable in-kernel polling by default */ -static unsigned long disk_events_dfl_poll_msecs = 0; +static unsigned long disk_events_dfl_poll_msecs; static unsigned long disk_events_poll_jiffies(struct gendisk *disk) { diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 34997d8ecd64..15bec407ac37 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; - rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); + rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); return blk_mq_rq_to_pdu(rq); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 09e3c0d87ecc..95dff91135ad 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -449,7 +449,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct request *rq; struct bio *bio = rqd->bio; - rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); + rq = blk_mq_alloc_request(q, bio_rw(bio), 0); if (IS_ERR(rq)) return -ENOMEM; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 60316fbaf295..9b9809b709a5 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp) err = -ENODEV; if (!bdev) goto out; - igrab(bdev->bd_inode); + bdgrab(bdev); err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); if (err) goto out; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 15f2acb4d5cd..1af54ea20e7b 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -471,7 +471,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct bio *bio = rqd->bio; struct nvme_nvm_command *cmd; - rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); + rq = blk_mq_alloc_request(q, bio_rw(bio), 0); if (IS_ERR(rq)) return -ENOMEM; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0c67b57be83c..f5c0e2613c7c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1041,7 +1041,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, struct request *req; int ret; - req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); + req = blk_mq_alloc_request(q, write, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -1094,7 +1094,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) struct nvme_cmd_info *cmd_info; struct request *req; - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true); + req = blk_mq_alloc_request(dev->admin_q, WRITE, + BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED); if (IS_ERR(req)) return PTR_ERR(req); @@ -1119,7 +1120,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, struct request *req; struct nvme_cmd_info *cmd_rq; - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); + req = blk_mq_alloc_request(dev->admin_q, WRITE, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -1320,8 +1321,8 @@ static void nvme_abort_req(struct request *req) if (!dev->abort_limit) return; - abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, - false); + abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) return; diff --git a/fs/block_dev.c b/fs/block_dev.c index 530145b607c4..ba762ea07f67 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -400,7 +400,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return result; - result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + result = blk_queue_enter(bdev->bd_queue, false); if (result) return result; result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); @@ -437,7 +437,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; - result = blk_queue_enter(bdev->bd_queue, GFP_NOIO); + result = blk_queue_enter(bdev->bd_queue, false); if (result) return result; @@ -700,7 +700,7 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); bdev = inode->i_bdev; if (bdev) { - ihold(bdev->bd_inode); + bdgrab(bdev); spin_unlock(&bdev_lock); return bdev; } @@ -716,7 +716,7 @@ static struct block_device *bd_acquire(struct inode *inode) * So, we can access it via ->i_mapping always * without igrab(). */ - ihold(bdev->bd_inode); + bdgrab(bdev); inode->i_bdev = bdev; inode->i_mapping = bdev->bd_inode->i_mapping; list_add(&inode->i_devices, &bdev->bd_inodes); @@ -739,7 +739,7 @@ void bd_forget(struct inode *inode) spin_unlock(&bdev_lock); if (bdev) - iput(bdev->bd_inode); + bdput(bdev); } /** diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index daf17d70aeca..7fc9296b5742 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_free_request(struct request *rq); void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); + +enum { + BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ + BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ +}; + struct request *blk_mq_alloc_request(struct request_queue *q, int rw, - gfp_t gfp, bool reserved); + unsigned int flags); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bfb64d672e19..d372ea87ead5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -795,7 +795,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern int blk_queue_enter(struct request_queue *q, bool nowait); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q);