forked from Minki/linux
e274832590
In null_init_zone_dev() check if the zone size is larger than device capacity, return error if needed. This also fixes the following oops :- null_blk: changed the number of conventional zones to 4294967295 BUG: kernel NULL pointer dereference, address: 0000000000000010 PGD 7d76c5067 P4D 7d76c5067 PUD 7d240c067 PMD 0 Oops: 0002 [#1] SMP NOPTI CPU: 4 PID: 5508 Comm: nullbtests.sh Tainted: G OE 5.7.0-rc4lblk-fnext0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e4 RIP: 0010:null_init_zoned_dev+0x17a/0x27f [null_blk] RSP: 0018:ffffc90007007e00 EFLAGS: 00010246 RAX: 0000000000000020 RBX: ffff8887fb3f3c00 RCX: 0000000000000007 RDX: 0000000000000000 RSI: ffff8887ca09d688 RDI: ffff888810fea510 RBP: 0000000000000010 R08: ffff8887ca09d688 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8887c26e8000 R13: ffffffffa05e9390 R14: 0000000000000000 R15: 0000000000000001 FS: 00007fcb5256f740(0000) GS:ffff888810e00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000081e8fe000 CR4: 00000000003406e0 Call Trace: null_add_dev+0x534/0x71b [null_blk] nullb_device_power_store.cold.41+0x8/0x2e [null_blk] configfs_write_file+0xe6/0x150 vfs_write+0xba/0x1e0 ksys_write+0x5f/0xe0 do_syscall_64+0x60/0x250 entry_SYSCALL_64_after_hwframe+0x49/0xb3 RIP: 0033:0x7fcb51c71840 Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
260 lines
6.5 KiB
C
260 lines
6.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/vmalloc.h>
|
|
#include "null_blk.h"
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include "null_blk_trace.h"
|
|
|
|
/* zone_size in MBs to sectors. */
|
|
#define ZONE_SIZE_SHIFT 11
|
|
|
|
static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
|
|
{
|
|
return sect >> ilog2(dev->zone_size_sects);
|
|
}
|
|
|
|
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
|
{
|
|
sector_t dev_size = (sector_t)dev->size * 1024 * 1024;
|
|
sector_t sector = 0;
|
|
unsigned int i;
|
|
|
|
if (!is_power_of_2(dev->zone_size)) {
|
|
pr_err("zone_size must be power-of-two\n");
|
|
return -EINVAL;
|
|
}
|
|
if (dev->zone_size > dev->size) {
|
|
pr_err("Zone size larger than device capacity\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
|
|
dev->nr_zones = dev_size >>
|
|
(SECTOR_SHIFT + ilog2(dev->zone_size_sects));
|
|
dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
|
|
GFP_KERNEL | __GFP_ZERO);
|
|
if (!dev->zones)
|
|
return -ENOMEM;
|
|
|
|
if (dev->zone_nr_conv >= dev->nr_zones) {
|
|
dev->zone_nr_conv = dev->nr_zones - 1;
|
|
pr_info("changed the number of conventional zones to %u",
|
|
dev->zone_nr_conv);
|
|
}
|
|
|
|
for (i = 0; i < dev->zone_nr_conv; i++) {
|
|
struct blk_zone *zone = &dev->zones[i];
|
|
|
|
zone->start = sector;
|
|
zone->len = dev->zone_size_sects;
|
|
zone->wp = zone->start + zone->len;
|
|
zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
|
|
zone->cond = BLK_ZONE_COND_NOT_WP;
|
|
|
|
sector += dev->zone_size_sects;
|
|
}
|
|
|
|
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
|
|
struct blk_zone *zone = &dev->zones[i];
|
|
|
|
zone->start = zone->wp = sector;
|
|
zone->len = dev->zone_size_sects;
|
|
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
|
|
zone->cond = BLK_ZONE_COND_EMPTY;
|
|
|
|
sector += dev->zone_size_sects;
|
|
}
|
|
|
|
q->limits.zoned = BLK_ZONED_HM;
|
|
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
|
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int null_register_zoned_dev(struct nullb *nullb)
|
|
{
|
|
struct request_queue *q = nullb->q;
|
|
|
|
if (queue_is_mq(q))
|
|
return blk_revalidate_disk_zones(nullb->disk);
|
|
|
|
blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
|
|
q->nr_zones = blkdev_nr_zones(nullb->disk);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void null_free_zoned_dev(struct nullb_device *dev)
|
|
{
|
|
kvfree(dev->zones);
|
|
}
|
|
|
|
int null_report_zones(struct gendisk *disk, sector_t sector,
|
|
unsigned int nr_zones, report_zones_cb cb, void *data)
|
|
{
|
|
struct nullb *nullb = disk->private_data;
|
|
struct nullb_device *dev = nullb->dev;
|
|
unsigned int first_zone, i;
|
|
struct blk_zone zone;
|
|
int error;
|
|
|
|
first_zone = null_zone_no(dev, sector);
|
|
if (first_zone >= dev->nr_zones)
|
|
return 0;
|
|
|
|
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
|
|
trace_nullb_report_zones(nullb, nr_zones);
|
|
|
|
for (i = 0; i < nr_zones; i++) {
|
|
/*
|
|
* Stacked DM target drivers will remap the zone information by
|
|
* modifying the zone information passed to the report callback.
|
|
* So use a local copy to avoid corruption of the device zone
|
|
* array.
|
|
*/
|
|
memcpy(&zone, &dev->zones[first_zone + i],
|
|
sizeof(struct blk_zone));
|
|
error = cb(&zone, i, data);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
return nr_zones;
|
|
}
|
|
|
|
size_t null_zone_valid_read_len(struct nullb *nullb,
|
|
sector_t sector, unsigned int len)
|
|
{
|
|
struct nullb_device *dev = nullb->dev;
|
|
struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
|
|
unsigned int nr_sectors = len >> SECTOR_SHIFT;
|
|
|
|
/* Read must be below the write pointer position */
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
|
|
sector + nr_sectors <= zone->wp)
|
|
return len;
|
|
|
|
if (sector > zone->wp)
|
|
return 0;
|
|
|
|
return (zone->wp - sector) << SECTOR_SHIFT;
|
|
}
|
|
|
|
static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
|
unsigned int nr_sectors)
|
|
{
|
|
struct nullb_device *dev = cmd->nq->dev;
|
|
unsigned int zno = null_zone_no(dev, sector);
|
|
struct blk_zone *zone = &dev->zones[zno];
|
|
blk_status_t ret;
|
|
|
|
trace_nullb_zone_op(cmd, zno, zone->cond);
|
|
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
|
|
|
switch (zone->cond) {
|
|
case BLK_ZONE_COND_FULL:
|
|
/* Cannot write to a full zone */
|
|
return BLK_STS_IOERR;
|
|
case BLK_ZONE_COND_EMPTY:
|
|
case BLK_ZONE_COND_IMP_OPEN:
|
|
case BLK_ZONE_COND_EXP_OPEN:
|
|
case BLK_ZONE_COND_CLOSED:
|
|
/* Writes must be at the write pointer position */
|
|
if (sector != zone->wp)
|
|
return BLK_STS_IOERR;
|
|
|
|
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
|
|
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
|
|
|
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
|
if (ret != BLK_STS_OK)
|
|
return ret;
|
|
|
|
zone->wp += nr_sectors;
|
|
if (zone->wp == zone->start + zone->len)
|
|
zone->cond = BLK_ZONE_COND_FULL;
|
|
return BLK_STS_OK;
|
|
default:
|
|
/* Invalid zone condition */
|
|
return BLK_STS_IOERR;
|
|
}
|
|
}
|
|
|
|
static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
|
sector_t sector)
|
|
{
|
|
struct nullb_device *dev = cmd->nq->dev;
|
|
unsigned int zone_no = null_zone_no(dev, sector);
|
|
struct blk_zone *zone = &dev->zones[zone_no];
|
|
size_t i;
|
|
|
|
switch (op) {
|
|
case REQ_OP_ZONE_RESET_ALL:
|
|
for (i = 0; i < dev->nr_zones; i++) {
|
|
if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
continue;
|
|
zone[i].cond = BLK_ZONE_COND_EMPTY;
|
|
zone[i].wp = zone[i].start;
|
|
}
|
|
break;
|
|
case REQ_OP_ZONE_RESET:
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
return BLK_STS_IOERR;
|
|
|
|
zone->cond = BLK_ZONE_COND_EMPTY;
|
|
zone->wp = zone->start;
|
|
break;
|
|
case REQ_OP_ZONE_OPEN:
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
return BLK_STS_IOERR;
|
|
if (zone->cond == BLK_ZONE_COND_FULL)
|
|
return BLK_STS_IOERR;
|
|
|
|
zone->cond = BLK_ZONE_COND_EXP_OPEN;
|
|
break;
|
|
case REQ_OP_ZONE_CLOSE:
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
return BLK_STS_IOERR;
|
|
if (zone->cond == BLK_ZONE_COND_FULL)
|
|
return BLK_STS_IOERR;
|
|
|
|
if (zone->wp == zone->start)
|
|
zone->cond = BLK_ZONE_COND_EMPTY;
|
|
else
|
|
zone->cond = BLK_ZONE_COND_CLOSED;
|
|
break;
|
|
case REQ_OP_ZONE_FINISH:
|
|
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
|
return BLK_STS_IOERR;
|
|
|
|
zone->cond = BLK_ZONE_COND_FULL;
|
|
zone->wp = zone->start + zone->len;
|
|
break;
|
|
default:
|
|
return BLK_STS_NOTSUPP;
|
|
}
|
|
|
|
trace_nullb_zone_op(cmd, zone_no, zone->cond);
|
|
return BLK_STS_OK;
|
|
}
|
|
|
|
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
|
|
sector_t sector, sector_t nr_sectors)
|
|
{
|
|
switch (op) {
|
|
case REQ_OP_WRITE:
|
|
return null_zone_write(cmd, sector, nr_sectors);
|
|
case REQ_OP_ZONE_RESET:
|
|
case REQ_OP_ZONE_RESET_ALL:
|
|
case REQ_OP_ZONE_OPEN:
|
|
case REQ_OP_ZONE_CLOSE:
|
|
case REQ_OP_ZONE_FINISH:
|
|
return null_zone_mgmt(cmd, op, sector);
|
|
default:
|
|
return null_process_cmd(cmd, op, sector, nr_sectors);
|
|
}
|
|
}
|