linux/drivers/block/rnbd/rnbd-proto.h

293 lines
7.1 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* RDMA Network Block Driver
*
* Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
* Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
* Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
*/
#ifndef RNBD_PROTO_H
#define RNBD_PROTO_H
#include <linux/types.h>
#include <linux/blk-mq.h>
#include <linux/limits.h>
#include <linux/inet.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <rdma/ib.h>
#define RNBD_PROTO_VER_MAJOR 2
#define RNBD_PROTO_VER_MINOR 0
/* The default port number the RTRS server is listening on. */
#define RTRS_PORT 1234
/**
* enum rnbd_msg_types - RNBD message types
* @RNBD_MSG_SESS_INFO: initial session info from client to server
* @RNBD_MSG_SESS_INFO_RSP: initial session info from server to client
* @RNBD_MSG_OPEN: open (map) device request
* @RNBD_MSG_OPEN_RSP: response to an @RNBD_MSG_OPEN
* @RNBD_MSG_IO: block IO request operation
* @RNBD_MSG_CLOSE: close (unmap) device request
*/
enum rnbd_msg_type {
RNBD_MSG_SESS_INFO,
RNBD_MSG_SESS_INFO_RSP,
RNBD_MSG_OPEN,
RNBD_MSG_OPEN_RSP,
RNBD_MSG_IO,
RNBD_MSG_CLOSE,
};
/**
* struct rnbd_msg_hdr - header of RNBD messages
* @type: Message type, valid values see: enum rnbd_msg_types
*/
struct rnbd_msg_hdr {
__le16 type;
__le16 __padding;
};
/**
* We allow to map RO many times and RW only once. We allow to map yet another
* time RW, if MIGRATION is provided (second RW export can be required for
* example for VM migration)
*/
enum rnbd_access_mode {
RNBD_ACCESS_RO,
RNBD_ACCESS_RW,
RNBD_ACCESS_MIGRATION,
};
static const __maybe_unused struct {
enum rnbd_access_mode mode;
const char *str;
} rnbd_access_modes[] = {
[RNBD_ACCESS_RO] = {RNBD_ACCESS_RO, "ro"},
[RNBD_ACCESS_RW] = {RNBD_ACCESS_RW, "rw"},
[RNBD_ACCESS_MIGRATION] = {RNBD_ACCESS_MIGRATION, "migration"},
};
/**
* struct rnbd_msg_sess_info - initial session info from client to server
* @hdr: message header
* @ver: RNBD protocol version
*/
struct rnbd_msg_sess_info {
struct rnbd_msg_hdr hdr;
u8 ver;
u8 reserved[31];
};
/**
* struct rnbd_msg_sess_info_rsp - initial session info from server to client
* @hdr: message header
* @ver: RNBD protocol version
*/
struct rnbd_msg_sess_info_rsp {
struct rnbd_msg_hdr hdr;
u8 ver;
u8 reserved[31];
};
/**
* struct rnbd_msg_open - request to open a remote device.
* @hdr: message header
* @access_mode: the mode to open remote device, valid values see:
* enum rnbd_access_mode
* @device_name: device path on remote side
*/
struct rnbd_msg_open {
struct rnbd_msg_hdr hdr;
u8 access_mode;
u8 resv1;
s8 dev_name[NAME_MAX];
u8 reserved[3];
};
/**
* struct rnbd_msg_close - request to close a remote device.
* @hdr: message header
* @device_id: device_id on server side to identify the device
*/
struct rnbd_msg_close {
struct rnbd_msg_hdr hdr;
__le32 device_id;
};
enum rnbd_cache_policy {
RNBD_FUA = 1 << 0,
RNBD_WRITEBACK = 1 << 1,
};
/**
* struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN
* @hdr: message header
* @device_id: device_id on server side to identify the device
* @nsectors: number of sectors in the usual 512b unit
* @max_hw_sectors: max hardware sectors in the usual 512b unit
* @max_write_same_sectors: max sectors for WRITE SAME in the 512b unit
* @max_discard_sectors: max. sectors that can be discarded at once in 512b
* unit.
* @discard_granularity: size of the internal discard allocation unit in bytes
* @discard_alignment: offset from internal allocation assignment in bytes
* @physical_block_size: physical block size device supports in bytes
* @logical_block_size: logical block size device supports in bytes
* @max_segments: max segments hardware support in one transfer
* @secure_discard: supports secure discard
* @obsolete_rotational: obsolete, not in used.
* @cache_policy: support write-back caching or FUA?
*/
struct rnbd_msg_open_rsp {
struct rnbd_msg_hdr hdr;
__le32 device_id;
__le64 nsectors;
__le32 max_hw_sectors;
__le32 max_write_same_sectors;
__le32 max_discard_sectors;
__le32 discard_granularity;
__le32 discard_alignment;
__le16 physical_block_size;
__le16 logical_block_size;
__le16 max_segments;
__le16 secure_discard;
u8 obsolete_rotational;
u8 cache_policy;
u8 reserved[10];
};
/**
* struct rnbd_msg_io - message for I/O read/write
* @hdr: message header
* @device_id: device_id on server side to find the right device
* @sector: bi_sector attribute from struct bio
* @rw: valid values are defined in enum rnbd_io_flags
* @bi_size: number of bytes for I/O read/write
* @prio: priority
*/
struct rnbd_msg_io {
struct rnbd_msg_hdr hdr;
__le32 device_id;
__le64 sector;
__le32 rw;
__le32 bi_size;
__le16 prio;
};
#define RNBD_OP_BITS 8
#define RNBD_OP_MASK ((1 << RNBD_OP_BITS) - 1)
/**
* enum rnbd_io_flags - RNBD request types from rq_flag_bits
* @RNBD_OP_READ: read sectors from the device
* @RNBD_OP_WRITE: write sectors to the device
* @RNBD_OP_FLUSH: flush the volatile write cache
* @RNBD_OP_DISCARD: discard sectors
* @RNBD_OP_SECURE_ERASE: securely erase sectors
* @RNBD_OP_WRITE_SAME: write the same sectors many times
* @RNBD_F_SYNC: request is sync (sync write or read)
* @RNBD_F_FUA: forced unit access
*/
enum rnbd_io_flags {
/* Operations */
RNBD_OP_READ = 0,
RNBD_OP_WRITE = 1,
RNBD_OP_FLUSH = 2,
RNBD_OP_DISCARD = 3,
RNBD_OP_SECURE_ERASE = 4,
RNBD_OP_WRITE_SAME = 5,
/* Flags */
RNBD_F_SYNC = 1<<(RNBD_OP_BITS + 0),
RNBD_F_FUA = 1<<(RNBD_OP_BITS + 1),
};
static inline u32 rnbd_op(u32 flags)
{
return flags & RNBD_OP_MASK;
}
static inline u32 rnbd_flags(u32 flags)
{
return flags & ~RNBD_OP_MASK;
}
static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf)
{
blk_opf_t bio_opf;
switch (rnbd_op(rnbd_opf)) {
case RNBD_OP_READ:
bio_opf = REQ_OP_READ;
break;
case RNBD_OP_WRITE:
bio_opf = REQ_OP_WRITE;
break;
case RNBD_OP_FLUSH:
block/rnbd: replace REQ_OP_FLUSH with REQ_OP_WRITE Since flush bios are implemented as writes with no data and the preflush flag per Christoph's comment [1]. And we need to change it in rnbd accordingly. Otherwise, I got splatting when create fs from rnbd client. [ 464.028545] ------------[ cut here ]------------ [ 464.028553] WARNING: CPU: 0 PID: 65 at block/blk-core.c:751 submit_bio_noacct+0x32c/0x5d0 [ ... ] [ 464.028668] CPU: 0 PID: 65 Comm: kworker/0:1H Tainted: G OE 6.4.0-rc1 #9 [ 464.028671] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 [ 464.028673] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [ 464.028717] RIP: 0010:submit_bio_noacct+0x32c/0x5d0 [ 464.028720] Code: 03 0f 85 51 fe ff ff 48 8b 43 18 8b 88 04 03 00 00 85 c9 0f 85 3f fe ff ff e9 be fd ff ff 0f b6 d0 3c 0d 74 26 83 fa 01 74 21 <0f> 0b b8 0a 00 00 00 e9 56 fd ff ff 4c 89 e7 e8 70 a1 03 00 84 c0 [ 464.028722] RSP: 0018:ffffaf3680b57c68 EFLAGS: 00010202 [ 464.028724] RAX: 0000000000060802 RBX: ffffa09dcc18bf00 RCX: 0000000000000000 [ 464.028726] RDX: 0000000000000002 RSI: 0000000000000000 RDI: ffffa09dde081d00 [ 464.028727] RBP: ffffaf3680b57c98 R08: ffffa09dde081d00 R09: ffffa09e38327200 [ 464.028729] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa09dde081d00 [ 464.028730] R13: ffffa09dcb06e1e8 R14: 0000000000000000 R15: 0000000000200000 [ 464.028733] FS: 0000000000000000(0000) GS:ffffa09e3bc00000(0000) knlGS:0000000000000000 [ 464.028735] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 464.028736] CR2: 000055a4e8206c40 CR3: 0000000119f06000 CR4: 00000000003506f0 [ 464.028738] Call Trace: [ 464.028740] <TASK> [ 464.028746] submit_bio+0x1b/0x80 [ 464.028748] rnbd_srv_rdma_ev+0x50d/0x10c0 [rnbd_server] [ 464.028754] ? percpu_ref_get_many.constprop.0+0x55/0x140 [rtrs_server] [ 464.028760] ? __this_cpu_preempt_check+0x13/0x20 [ 464.028769] process_io_req+0x1dc/0x450 [rtrs_server] [ 464.028775] rtrs_srv_inv_rkey_done+0x67/0xb0 [rtrs_server] [ 464.028780] __ib_process_cq+0xbc/0x1f0 [ib_core] [ 464.028793] ib_cq_poll_work+0x2b/0xa0 [ib_core] [ 464.028804] process_one_work+0x2a9/0x580 [1]. https://lore.kernel.org/all/ZFHgefWofVt24tRl@infradead.org/ Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Link: https://lore.kernel.org/r/20230512034631.28686-1-guoqing.jiang@linux.dev Signed-off-by: Jens Axboe <axboe@kernel.dk>
2023-05-12 03:46:31 +00:00
bio_opf = REQ_OP_WRITE | REQ_PREFLUSH;
break;
case RNBD_OP_DISCARD:
bio_opf = REQ_OP_DISCARD;
break;
case RNBD_OP_SECURE_ERASE:
bio_opf = REQ_OP_SECURE_ERASE;
break;
default:
WARN(1, "Unknown RNBD type: %d (flags %d)\n",
rnbd_op(rnbd_opf), rnbd_opf);
bio_opf = 0;
}
if (rnbd_opf & RNBD_F_SYNC)
bio_opf |= REQ_SYNC;
if (rnbd_opf & RNBD_F_FUA)
bio_opf |= REQ_FUA;
return bio_opf;
}
static inline u32 rq_to_rnbd_flags(struct request *rq)
{
u32 rnbd_opf;
switch (req_op(rq)) {
case REQ_OP_READ:
rnbd_opf = RNBD_OP_READ;
break;
case REQ_OP_WRITE:
rnbd_opf = RNBD_OP_WRITE;
break;
case REQ_OP_DISCARD:
rnbd_opf = RNBD_OP_DISCARD;
break;
case REQ_OP_SECURE_ERASE:
rnbd_opf = RNBD_OP_SECURE_ERASE;
break;
case REQ_OP_FLUSH:
rnbd_opf = RNBD_OP_FLUSH;
break;
default:
WARN(1, "Unknown request type %d (flags %llu)\n",
(__force u32)req_op(rq),
(__force unsigned long long)rq->cmd_flags);
rnbd_opf = 0;
}
if (op_is_sync(rq->cmd_flags))
rnbd_opf |= RNBD_F_SYNC;
if (op_is_flush(rq->cmd_flags))
rnbd_opf |= RNBD_F_FUA;
return rnbd_opf;
}
const char *rnbd_access_mode_str(enum rnbd_access_mode mode);
#endif /* RNBD_PROTO_H */