block-6.6-2023-09-08

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmT7NSUQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpuIuEACxQj0JjIVwgp8VlUcMAEFap7GH940qs8is
 tnRrGS48G/z3WZ0zizGQ1hF2m9oOJ1NwZ1bBrGJlVBP8KSgyR9IxI6WVt0JBZgg8
 DNQt+v5FoX4v5ZbFr/NTqz/6YnPWVwyTkJs4JIi9YLmpKkd58N+51n3wl56xyrp7
 8OcjHPH1ZI0qGar2poPXI7EKaweSzJW7nr3/cHujYoCfAGn7eZbHNgY0wdAtWdhM
 oGFABgMKTFKA0rCSaP8uo7GIqov+KXvFUNOuC6eNjtaUvXMecPLEJxvDhhi52fml
 LEH1a9bmtazu6bufrbI2D4SmXLNBcqnQw5d/1pn9oRO/UU6AAi6E7I/L1iodKWav
 bkr2i+YMnhUqVDFRwy+j78bcGRlLPWupvj68RdpvAvzhdW/85p9nH66CJfTnjQ3q
 xJJKoUREd/8C6ktm5xHYyETxcL5q6Vdz1w5GmsCyOWWpsXsEbh0GHn6pRDkLWJf4
 OQhO4CbxIARGE5NRK8MYl/J/581yS1cIdGZAg7hsVHzTGDSt3h/Hlh6ONpWCcvXD
 SJI39W83oBSOnsbRG4FU6Bnt/WHTD0yi07cr867l9HEq63JOK/OduMjXA/tdRfwP
 fZU3RfFYcS8+h9UHgrSgvCk03fCjq1HsWCDfpc6sUuRG1fH6jSNFdkCKqhCoU9Py
 K2IKi1IE0Q==
 =4SLW
 -----END PGP SIGNATURE-----

Merge tag 'block-6.6-2023-09-08' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fix null_blk polled IO timeout handling (Chengming)

 - Regression fix for swapped arguments in drbd bvec_set_page()
   (Christoph)

 - String length handling fix for s390 dasd (Heiko)

 - Fixes for blk-throttle accounting (Yu)

 - Fix page pinning issue for same page segments (Christoph)

 - Remove redundant file_remove_privs() call (Christoph)

 - Fix a regression in partition handling for devices not supporting
   partitions (Li)

* tag 'block-6.6-2023-09-08' of git://git.kernel.dk/linux:
  drbd: swap bvec_set_page len and offset
  block: fix pin count management when merging same-page segments
  null_blk: fix poll request timeout handling
  s390/dasd: fix string length handling
  block: don't add or resize partition on the disk with GENHD_FL_NO_PART
  block: remove the call to file_remove_privs in blkdev_write_iter
  blk-throttle: consider 'carryover_ios/bytes' in throtl_trim_slice()
  blk-throttle: use calculate_io/bytes_allowed() for throtl_trim_slice()
  blk-throttle: fix wrong comparation while 'carryover_ios/bytes' is negative
  blk-throttle: print signed value 'carryover_bytes/ios' for user
This commit is contained in:
Linus Torvalds 2023-09-08 21:39:54 -07:00
commit 7402e635ed
10 changed files with 95 additions and 90 deletions

View File

@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page)) {
if (same_page)
bio_release_page(bio, page);
max_sectors, &same_page))
break;
}
if (same_page)
bio_release_page(bio, page);
bytes -= n;
offs = 0;
}

View File

@ -697,66 +697,6 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
return true;
}
/* Trim the used slices and adjust slice start accordingly */
static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
{
unsigned long nr_slices, time_elapsed, io_trim;
u64 bytes_trim, tmp;
BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
/*
* If bps are unlimited (-1), then time slice don't get
* renewed. Don't try to trim the slice if slice is used. A new
* slice will start when appropriate.
*/
if (throtl_slice_used(tg, rw))
return;
/*
* A bio has been dispatched. Also adjust slice_end. It might happen
* that initially cgroup limit was very low resulting in high
* slice_end, but later limit was bumped up and bio was dispatched
* sooner, then we need to reduce slice_end. A high bogus slice_end
* is bad because it does not allow new slice to start.
*/
throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
time_elapsed = jiffies - tg->slice_start[rw];
nr_slices = time_elapsed / tg->td->throtl_slice;
if (!nr_slices)
return;
tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
do_div(tmp, HZ);
bytes_trim = tmp;
io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
HZ;
if (!bytes_trim && !io_trim)
return;
if (tg->bytes_disp[rw] >= bytes_trim)
tg->bytes_disp[rw] -= bytes_trim;
else
tg->bytes_disp[rw] = 0;
if (tg->io_disp[rw] >= io_trim)
tg->io_disp[rw] -= io_trim;
else
tg->io_disp[rw] = 0;
tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
throtl_log(&tg->service_queue,
"[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
tg->slice_start[rw], tg->slice_end[rw], jiffies);
}
static unsigned int calculate_io_allowed(u32 iops_limit,
unsigned long jiffy_elapsed)
{
@ -786,6 +726,67 @@ static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
}
/* Trim the used slices and adjust slice start accordingly */
static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
{
unsigned long time_elapsed;
long long bytes_trim;
int io_trim;
BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
/*
* If bps are unlimited (-1), then time slice don't get
* renewed. Don't try to trim the slice if slice is used. A new
* slice will start when appropriate.
*/
if (throtl_slice_used(tg, rw))
return;
/*
* A bio has been dispatched. Also adjust slice_end. It might happen
* that initially cgroup limit was very low resulting in high
* slice_end, but later limit was bumped up and bio was dispatched
* sooner, then we need to reduce slice_end. A high bogus slice_end
* is bad because it does not allow new slice to start.
*/
throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
time_elapsed = rounddown(jiffies - tg->slice_start[rw],
tg->td->throtl_slice);
if (!time_elapsed)
return;
bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
time_elapsed) +
tg->carryover_bytes[rw];
io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
tg->carryover_ios[rw];
if (bytes_trim <= 0 && io_trim <= 0)
return;
tg->carryover_bytes[rw] = 0;
if ((long long)tg->bytes_disp[rw] >= bytes_trim)
tg->bytes_disp[rw] -= bytes_trim;
else
tg->bytes_disp[rw] = 0;
tg->carryover_ios[rw] = 0;
if ((int)tg->io_disp[rw] >= io_trim)
tg->io_disp[rw] -= io_trim;
else
tg->io_disp[rw] = 0;
tg->slice_start[rw] += time_elapsed;
throtl_log(&tg->service_queue,
"[%c] trim slice nr=%lu bytes=%lld io=%d start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice,
bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw],
jiffies);
}
static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
{
unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
@ -816,7 +817,7 @@ static void tg_update_carryover(struct throtl_grp *tg)
__tg_update_carryover(tg, WRITE);
/* see comments in struct throtl_grp for meaning of these fields. */
throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
}
@ -825,7 +826,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
u32 iops_limit)
{
bool rw = bio_data_dir(bio);
unsigned int io_allowed;
int io_allowed;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
if (iops_limit == UINT_MAX) {
@ -838,9 +839,8 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
tg->carryover_ios[rw];
if (tg->io_disp[rw] + 1 <= io_allowed) {
if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
return 0;
}
/* Calc approx time to dispatch */
jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
@ -851,7 +851,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
u64 bps_limit)
{
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes;
long long bytes_allowed;
u64 extra_bytes;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
@ -869,9 +870,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
tg->carryover_bytes[rw];
if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
return 0;
}
/* Calc approx time to dispatch */
extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;

View File

@ -127,8 +127,8 @@ struct throtl_grp {
* bytes/ios are waited already in previous configuration, and they will
* be used to calculate wait time under new configuration.
*/
uint64_t carryover_bytes[2];
unsigned int carryover_ios[2];
long long carryover_bytes[2];
int carryover_ios[2];
unsigned long last_check_time;

View File

@ -671,10 +671,6 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
iov_iter_truncate(from, size);
}
ret = file_remove_privs(file);
if (ret)
return ret;
ret = file_update_time(file);
if (ret)
return ret;

View File

@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
long long start, length;
if (disk->flags & GENHD_FL_NO_PART)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))

View File

@ -1557,7 +1557,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
do {
int sent;
bvec_set_page(&bvec, page, offset, len);
bvec_set_page(&bvec, page, len, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
sent = sock_sendmsg(socket, &msg);

View File

@ -1643,9 +1643,12 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
struct nullb_queue *nq = hctx->driver_data;
LIST_HEAD(list);
int nr = 0;
struct request *rq;
spin_lock(&nq->poll_lock);
list_splice_init(&nq->poll_list, &list);
list_for_each_entry(rq, &list, queuelist)
blk_mq_set_request_complete(rq);
spin_unlock(&nq->poll_lock);
while (!list_empty(&list)) {
@ -1671,16 +1674,21 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
pr_info("rq %p timed out\n", rq);
if (hctx->type == HCTX_TYPE_POLL) {
struct nullb_queue *nq = hctx->driver_data;
spin_lock(&nq->poll_lock);
/* The request may have completed meanwhile. */
if (blk_mq_request_completed(rq)) {
spin_unlock(&nq->poll_lock);
return BLK_EH_DONE;
}
list_del_init(&rq->queuelist);
spin_unlock(&nq->poll_lock);
}
pr_info("rq %p timed out\n", rq);
/*
* If the device is marked as blocking (i.e. memory backed or zoned
* device), the submission path may be blocked waiting for resources

View File

@ -1378,16 +1378,12 @@ static ssize_t dasd_vendor_show(struct device *dev,
static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 +\
/* SSID */ 4 + 1 + /* unit addr */ 2 + 1 +\
/* vduit */ 32 + 1)
static ssize_t
dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
{
char uid_string[DASD_UID_STRLEN];
struct dasd_device *device;
struct dasd_uid uid;
char uid_string[UID_STRLEN];
char ua_string[3];
device = dasd_device_from_cdev(to_ccwdev(dev));

View File

@ -1079,12 +1079,12 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
create_uid(conf, &uid);
if (strlen(uid.vduit) > 0)
snprintf(print_uid, sizeof(*print_uid),
snprintf(print_uid, DASD_UID_STRLEN,
"%s.%s.%04x.%02x.%s",
uid.vendor, uid.serial, uid.ssid,
uid.real_unit_addr, uid.vduit);
else
snprintf(print_uid, sizeof(*print_uid),
snprintf(print_uid, DASD_UID_STRLEN,
"%s.%s.%04x.%02x",
uid.vendor, uid.serial, uid.ssid,
uid.real_unit_addr);
@ -1093,8 +1093,8 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
static int dasd_eckd_check_cabling(struct dasd_device *device,
void *conf_data, __u8 lpm)
{
char print_path_uid[DASD_UID_STRLEN], print_device_uid[DASD_UID_STRLEN];
struct dasd_eckd_private *private = device->private;
char print_path_uid[60], print_device_uid[60];
struct dasd_conf path_conf;
path_conf.data = conf_data;
@ -1293,9 +1293,9 @@ static void dasd_eckd_path_available_action(struct dasd_device *device,
__u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE];
__u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm;
struct dasd_conf_data *conf_data;
char print_uid[DASD_UID_STRLEN];
struct dasd_conf path_conf;
unsigned long flags;
char print_uid[60];
int rc, pos;
opm = 0;
@ -5855,8 +5855,8 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
static int dasd_eckd_reload_device(struct dasd_device *device)
{
struct dasd_eckd_private *private = device->private;
char print_uid[DASD_UID_STRLEN];
int rc, old_base;
char print_uid[60];
struct dasd_uid uid;
unsigned long flags;

View File

@ -259,6 +259,10 @@ struct dasd_uid {
char vduit[33];
};
#define DASD_UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 + \
/* SSID */ 4 + 1 + /* unit addr */ 2 + 1 + \
/* vduit */ 32 + 1)
/*
* PPRC Status data
*/