Merge tag 'md/4.4' of git://neil.brown.name/md
Pull md updates from Neil Brown:
"Two major components to this update.
1) The clustered-raid1 support from SUSE is nearly complete. There
are a few outstanding issues being worked on. Maybe half a dozen
patches will bring this to a usable state.
2) The first stage of journalled-raid5 support from Facebook makes an
appearance. With a journal device configured (typically NVRAM or
SSD), the "RAID5 write hole" should be closed - a crash during
degraded operations cannot result in data corruption.
The next stage will be to use the journal as a write-behind cache
so that latency can be reduced and in some cases throughput
increased by performing more full-stripe writes.
* tag 'md/4.4' of git://neil.brown.name/md: (66 commits)
MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW
MD: set journal disk ->raid_disk
MD: kick out journal disk if it's not fresh
raid5-cache: start raid5 readonly if journal is missing
MD: add new bit to indicate raid array with journal
raid5-cache: IO error handling
raid5: journal disk can't be removed
raid5-cache: add trim support for log
MD: fix info output for journal disk
raid5-cache: use bio chaining
raid5-cache: small log->seq cleanup
raid5-cache: new helper: r5_reserve_log_entry
raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
raid5-cache: take rdev->data_offset into account early on
raid5-cache: refactor bio allocation
raid5-cache: clean up r5l_get_meta
raid5-cache: simplify state machine when caches flushes are not needed
raid5-cache: factor out a helper to run all stripes for an I/O unit
raid5-cache: rename flushed_ios to finished_ios
raid5-cache: free I/O units earlier
...
This commit is contained in:
@@ -89,6 +89,12 @@
|
||||
* read requests will only be sent here in
|
||||
* dire need
|
||||
*/
|
||||
#define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */
|
||||
|
||||
#define MD_DISK_ROLE_SPARE 0xffff
|
||||
#define MD_DISK_ROLE_FAULTY 0xfffe
|
||||
#define MD_DISK_ROLE_JOURNAL 0xfffd
|
||||
#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */
|
||||
|
||||
typedef struct mdp_device_descriptor_s {
|
||||
__u32 number; /* 0 Device number in the entire set */
|
||||
@@ -252,7 +258,10 @@ struct mdp_superblock_1 {
|
||||
__le64 data_offset; /* sector start of data, often 0 */
|
||||
__le64 data_size; /* sectors in this device that can be used for data */
|
||||
__le64 super_offset; /* sector start of this superblock */
|
||||
__le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
|
||||
union {
|
||||
__le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
|
||||
__le64 journal_tail;/* journal tail of journal device (from data_offset) */
|
||||
};
|
||||
__le32 dev_number; /* permanent identifier of this device - not role in raid */
|
||||
__le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
|
||||
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
|
||||
@@ -302,6 +311,8 @@ struct mdp_superblock_1 {
|
||||
#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
|
||||
* is guided by bitmap.
|
||||
*/
|
||||
#define MD_FEATURE_CLUSTERED 256 /* clustered MD */
|
||||
#define MD_FEATURE_JOURNAL 512 /* support write cache */
|
||||
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|
||||
|MD_FEATURE_RECOVERY_OFFSET \
|
||||
|MD_FEATURE_RESHAPE_ACTIVE \
|
||||
@@ -310,6 +321,66 @@ struct mdp_superblock_1 {
|
||||
|MD_FEATURE_RESHAPE_BACKWARDS \
|
||||
|MD_FEATURE_NEW_OFFSET \
|
||||
|MD_FEATURE_RECOVERY_BITMAP \
|
||||
|MD_FEATURE_CLUSTERED \
|
||||
|MD_FEATURE_JOURNAL \
|
||||
)
|
||||
|
||||
struct r5l_payload_header {
|
||||
__le16 type;
|
||||
__le16 flags;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
enum r5l_payload_type {
|
||||
R5LOG_PAYLOAD_DATA = 0,
|
||||
R5LOG_PAYLOAD_PARITY = 1,
|
||||
R5LOG_PAYLOAD_FLUSH = 2,
|
||||
};
|
||||
|
||||
struct r5l_payload_data_parity {
|
||||
struct r5l_payload_header header;
|
||||
__le32 size; /* sector. data/parity size. each 4k
|
||||
* has a checksum */
|
||||
__le64 location; /* sector. For data, it's raid sector. For
|
||||
* parity, it's stripe sector */
|
||||
__le32 checksum[];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
enum r5l_payload_data_parity_flag {
|
||||
R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */
|
||||
/*
|
||||
* RESHAPED/RESHAPING is only set when there is reshape activity. Note,
|
||||
* both data/parity of a stripe should have the same flag set
|
||||
*
|
||||
* RESHAPED: reshape is running, and this stripe finished reshape
|
||||
* RESHAPING: reshape is running, and this stripe isn't reshaped
|
||||
*/
|
||||
R5LOG_PAYLOAD_FLAG_RESHAPED = 2,
|
||||
R5LOG_PAYLOAD_FLAG_RESHAPING = 3,
|
||||
};
|
||||
|
||||
struct r5l_payload_flush {
|
||||
struct r5l_payload_header header;
|
||||
__le32 size; /* flush_stripes size, bytes */
|
||||
__le64 flush_stripes[];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
enum r5l_payload_flush_flag {
|
||||
R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */
|
||||
};
|
||||
|
||||
struct r5l_meta_block {
|
||||
__le32 magic;
|
||||
__le32 checksum;
|
||||
__u8 version;
|
||||
__u8 __zero_pading_1;
|
||||
__le16 __zero_pading_2;
|
||||
__le32 meta_size; /* whole size of the block */
|
||||
|
||||
__le64 seq;
|
||||
__le64 position; /* sector, start from rdev->data_offset, current position */
|
||||
struct r5l_payload_header payloads[];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define R5LOG_VERSION 0x1
|
||||
#define R5LOG_MAGIC 0x6433c509
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user