From 5d87a052c7e5f245bbb3018721b4b0afe0afc252 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 20 Feb 2008 09:01:22 +0100 Subject: [PATCH 01/17] block: fix kernel-docbook parameters and files kernel-doc for block/: - add missing parameters - fix one function's parameter list (remove blank line) - add 2 source files to docbook for non-exported kernel-doc functions Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- Documentation/DocBook/kernel-api.tmpl | 2 ++ block/blk-core.c | 2 ++ block/blk-settings.c | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index f31601e8bd89..dc0f30c3e571 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -361,12 +361,14 @@ X!Edrivers/pnp/system.c Block Devices !Eblock/blk-core.c +!Iblock/blk-core.c !Eblock/blk-map.c !Iblock/blk-sysfs.c !Eblock/blk-settings.c !Eblock/blk-exec.c !Eblock/blk-barrier.c !Eblock/blk-tag.c +!Iblock/blk-tag.c diff --git a/block/blk-core.c b/block/blk-core.c index 775c8516abf5..2d7e3a2f56c4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1768,6 +1768,7 @@ static inline void __end_request(struct request *rq, int uptodate, /** * blk_rq_bytes - Returns bytes left to complete in the entire request + * @rq: the request being processed **/ unsigned int blk_rq_bytes(struct request *rq) { @@ -1780,6 +1781,7 @@ EXPORT_SYMBOL_GPL(blk_rq_bytes); /** * blk_rq_cur_bytes - Returns bytes left to complete in the current segment + * @rq: the request being processed **/ unsigned int blk_rq_cur_bytes(struct request *rq) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 9a8ffdd0ce3d..da923fed1f2c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -294,7 +294,6 @@ EXPORT_SYMBOL(blk_queue_stack_limits); /** * blk_queue_dma_drain - Set up a drain buffer for excess dma. - * * @q: the request queue for the device * @dma_drain_needed: fn which returns non-zero if drain is necessary * @buf: physically contiguous buffer From 02cf01aea5af7a4d1a38045712fe11bffcc206b0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Feb 2008 10:34:51 +0100 Subject: [PATCH 02/17] splice: only return -EAGAIN if there's hope of more data sys_tee() currently is a bit eager in returning -EAGAIN, it may do so even if we don't have a chance of anymore data becoming available. So improve the logic and only return -EAGAIN if we have an attached writer to the input pipe. Reported by Johann Felix Soden and Patrick McManus . Tested-by: Johann Felix Soden Signed-off-by: Jens Axboe --- fs/splice.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 9b559ee711a8..0670c915cd35 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1669,6 +1669,13 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); + /* + * return EAGAIN if we have the potential of some data in the + * future, otherwise just return 0 + */ + if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) + ret = -EAGAIN; + inode_double_unlock(ipipe->inode, opipe->inode); /* @@ -1709,11 +1716,8 @@ static long do_tee(struct file *in, struct file *out, size_t len, ret = link_ipipe_prep(ipipe, flags); if (!ret) { ret = link_opipe_prep(opipe, flags); - if (!ret) { + if (!ret) ret = link_pipe(ipipe, opipe, len, flags); - if (!ret && (flags & SPLICE_F_NONBLOCK)) - ret = -EAGAIN; - } } } From 89b6e743788516491846724d7ef89bcac7ac9c99 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 21 Feb 2008 08:54:03 +0100 Subject: [PATCH 03/17] resubmit: cciss: procfs updates to display info about many volumes This patch allows us to display information about all of the logical volumes configured on a particular controller without stepping on memory even when there are many volumes (128 or more) configured. Please consider this for inclusion. Signed-off-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 257 +++++++++++++++++++++++-------------- drivers/block/cciss_scsi.c | 10 +- 2 files changed, 162 insertions(+), 105 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9715be3f2487..f1e7390683fd 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -174,8 +175,6 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size, static void fail_all_cmds(unsigned long ctlr); #ifdef CONFIG_PROC_FS -static int cciss_proc_get_info(char *buffer, char **start, off_t offset, - int length, int *eof, void *data); static void cciss_procinit(int i); #else static void cciss_procinit(int i) @@ -240,24 +239,46 @@ static inline CommandList_struct *removeQ(CommandList_struct **Qptr, */ #define ENG_GIG 1000000000 #define ENG_GIG_FACTOR (ENG_GIG/512) +#define ENGAGE_SCSI "engage scsi" static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", "UNKNOWN" }; static struct proc_dir_entry *proc_cciss; -static int cciss_proc_get_info(char *buffer, char **start, off_t offset, - int length, int *eof, void *data) +static void cciss_seq_show_header(struct seq_file *seq) { - off_t pos = 0; - off_t len = 0; - int size, i, ctlr; - ctlr_info_t *h = (ctlr_info_t *) data; - drive_info_struct *drv; - unsigned long flags; - sector_t vol_sz, vol_sz_frac; + ctlr_info_t *h = seq->private; - ctlr = h->ctlr; + seq_printf(seq, "%s: HP %s Controller\n" + "Board ID: 0x%08lx\n" + "Firmware Version: %c%c%c%c\n" + "IRQ: %d\n" + "Logical drives: %d\n" + "Current Q depth: %d\n" + "Current # commands on controller: %d\n" + "Max Q depth since init: %d\n" + "Max # commands on controller since init: %d\n" + "Max SG entries since init: %d\n", + h->devname, + h->product_name, + (unsigned long)h->board_id, + h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], + h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT], + h->num_luns, + h->Qdepth, h->commands_outstanding, + h->maxQsinceinit, h->max_outstanding, h->maxSG); + +#ifdef CONFIG_CISS_SCSI_TAPE + cciss_seq_tape_report(seq, h->ctlr); +#endif /* CONFIG_CISS_SCSI_TAPE */ +} + +static void *cciss_seq_start(struct seq_file *seq, loff_t *pos) +{ + ctlr_info_t *h = seq->private; + unsigned ctlr = h->ctlr; + unsigned long flags; /* prevent displaying bogus info during configuration * or deconfiguration of a logical volume @@ -265,115 +286,155 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset, spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (h->busy_configuring) { spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - return -EBUSY; + return ERR_PTR(-EBUSY); } h->busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - size = sprintf(buffer, "%s: HP %s Controller\n" - "Board ID: 0x%08lx\n" - "Firmware Version: %c%c%c%c\n" - "IRQ: %d\n" - "Logical drives: %d\n" - "Max sectors: %d\n" - "Current Q depth: %d\n" - "Current # commands on controller: %d\n" - "Max Q depth since init: %d\n" - "Max # commands on controller since init: %d\n" - "Max SG entries since init: %d\n\n", - h->devname, - h->product_name, - (unsigned long)h->board_id, - h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], - h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT], - h->num_luns, - h->cciss_max_sectors, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + if (*pos == 0) + cciss_seq_show_header(seq); - pos += size; - len += size; - cciss_proc_tape_report(ctlr, buffer, &pos, &len); - for (i = 0; i <= h->highest_lun; i++) { - - drv = &h->drv[i]; - if (drv->heads == 0) - continue; - - vol_sz = drv->nr_blocks; - vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR); - vol_sz_frac *= 100; - sector_div(vol_sz_frac, ENG_GIG_FACTOR); - - if (drv->raid_level > 5) - drv->raid_level = RAID_UNKNOWN; - size = sprintf(buffer + len, "cciss/c%dd%d:" - "\t%4u.%02uGB\tRAID %s\n", - ctlr, i, (int)vol_sz, (int)vol_sz_frac, - raid_label[drv->raid_level]); - pos += size; - len += size; - } - - *eof = 1; - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - h->busy_configuring = 0; - return len; + return pos; } -static int -cciss_proc_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static int cciss_seq_show(struct seq_file *seq, void *v) { - unsigned char cmd[80]; - int len; -#ifdef CONFIG_CISS_SCSI_TAPE - ctlr_info_t *h = (ctlr_info_t *) data; - int rc; + sector_t vol_sz, vol_sz_frac; + ctlr_info_t *h = seq->private; + unsigned ctlr = h->ctlr; + loff_t *pos = v; + drive_info_struct *drv = &h->drv[*pos]; + + if (*pos > h->highest_lun) + return 0; + + if (drv->heads == 0) + return 0; + + vol_sz = drv->nr_blocks; + vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR); + vol_sz_frac *= 100; + sector_div(vol_sz_frac, ENG_GIG_FACTOR); + + if (drv->raid_level > 5) + drv->raid_level = RAID_UNKNOWN; + seq_printf(seq, "cciss/c%dd%d:" + "\t%4u.%02uGB\tRAID %s\n", + ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac, + raid_label[drv->raid_level]); + return 0; +} + +static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ctlr_info_t *h = seq->private; + + if (*pos > h->highest_lun) + return NULL; + *pos += 1; + + return pos; +} + +static void cciss_seq_stop(struct seq_file *seq, void *v) +{ + ctlr_info_t *h = seq->private; + + /* Only reset h->busy_configuring if we succeeded in setting + * it during cciss_seq_start. */ + if (v == ERR_PTR(-EBUSY)) + return; + + h->busy_configuring = 0; +} + +static struct seq_operations cciss_seq_ops = { + .start = cciss_seq_start, + .show = cciss_seq_show, + .next = cciss_seq_next, + .stop = cciss_seq_stop, +}; + +static int cciss_seq_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &cciss_seq_ops); + struct seq_file *seq = file->private_data; + + if (!ret) + seq->private = PDE(inode)->data; + + return ret; +} + +static ssize_t +cciss_proc_write(struct file *file, const char __user *buf, + size_t length, loff_t *ppos) +{ + int err; + char *buffer; + +#ifndef CONFIG_CISS_SCSI_TAPE + return -EINVAL; #endif - if (count > sizeof(cmd) - 1) + if (!buf || length > PAGE_SIZE - 1) return -EINVAL; - if (copy_from_user(cmd, buffer, count)) - return -EFAULT; - cmd[count] = '\0'; - len = strlen(cmd); // above 3 lines ensure safety - if (len && cmd[len - 1] == '\n') - cmd[--len] = '\0'; -# ifdef CONFIG_CISS_SCSI_TAPE - if (strcmp("engage scsi", cmd) == 0) { + + buffer = (char *)__get_free_page(GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(buffer, buf, length)) + goto out; + buffer[length] = '\0'; + +#ifdef CONFIG_CISS_SCSI_TAPE + if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) { + struct seq_file *seq = file->private_data; + ctlr_info_t *h = seq->private; + int rc; + rc = cciss_engage_scsi(h->ctlr); if (rc != 0) - return -rc; - return count; - } + err = -rc; + else + err = length; + } else +#endif /* CONFIG_CISS_SCSI_TAPE */ + err = -EINVAL; /* might be nice to have "disengage" too, but it's not safely possible. (only 1 module use count, lock issues.) */ -# endif - return -EINVAL; + +out: + free_page((unsigned long)buffer); + return err; } -/* - * Get us a file in /proc/cciss that says something about each controller. - * Create /proc/cciss if it doesn't exist yet. - */ +static struct file_operations cciss_proc_fops = { + .owner = THIS_MODULE, + .open = cciss_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = cciss_proc_write, +}; + static void __devinit cciss_procinit(int i) { struct proc_dir_entry *pde; - if (proc_cciss == NULL) { + if (proc_cciss == NULL) proc_cciss = proc_mkdir("cciss", proc_root_driver); - if (!proc_cciss) - return; - } + if (!proc_cciss) + return; + pde = proc_create(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP | + S_IROTH, proc_cciss, + &cciss_proc_fops); + if (!pde) + return; - pde = create_proc_read_entry(hba[i]->devname, - S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH, - proc_cciss, cciss_proc_get_info, hba[i]); - pde->write_proc = cciss_proc_write; + pde->data = hba[i]; } #endif /* CONFIG_PROC_FS */ diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 55178e9973a0..45ac09300eb3 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -1404,21 +1404,18 @@ cciss_engage_scsi(int ctlr) } static void -cciss_proc_tape_report(int ctlr, unsigned char *buffer, off_t *pos, off_t *len) +cciss_seq_tape_report(struct seq_file *seq, int ctlr) { unsigned long flags; - int size; - - *pos = *pos -1; *len = *len - 1; // cut off the last trailing newline CPQ_TAPE_LOCK(ctlr, flags); - size = sprintf(buffer + *len, + seq_printf(seq, "Sequential access devices: %d\n\n", ccissscsi[ctlr].ndevices); CPQ_TAPE_UNLOCK(ctlr, flags); - *pos += size; *len += size; } + /* Need at least one of these error handlers to keep ../scsi/hosts.c from * complaining. Doing a host- or bus-reset can't do anything good here. * Despite what it might say in scsi_error.c, there may well be commands @@ -1498,6 +1495,5 @@ static int cciss_eh_abort_handler(struct scsi_cmnd *scsicmd) #define cciss_scsi_setup(cntl_num) #define cciss_unregister_scsi(ctlr) #define cciss_register_scsi(ctlr) -#define cciss_proc_tape_report(ctlr, buffer, pos, len) #endif /* CONFIG_CISS_SCSI_TAPE */ From 7a85f8896f4b4a4a0249563b92af9e3161a6b467 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Mar 2008 11:17:11 +0100 Subject: [PATCH 04/17] block: restore the meaning of rq->data_len to the true data length The meaning of rq->data_len was changed to the length of an allocated buffer from the true data length. It breaks SG_IO friends and bsg. This patch restores the meaning of rq->data_len to the true data length and adds rq->extra_len to store an extended length (due to drain buffer and padding). This patch also removes the code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa. The commit adjusts bio according to memory alignment (queue_dma_alignment). However, memory alignment is NOT padding alignment. This adjustment also breaks SG_IO friends and bsg. Padding alignment needs to be fixed in a proper way (by a separate patch). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- block/blk-map.c | 6 +----- block/blk-merge.c | 2 +- block/bsg.c | 8 ++++---- block/scsi_ioctl.c | 4 ++-- drivers/ata/libata-scsi.c | 6 +++--- include/linux/blkdev.h | 2 +- 7 files changed, 13 insertions(+), 18 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2d7e3a2f56c4..a248cf1c98dd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,6 @@ void rq_init(struct request_queue *q, struct request *rq) rq->nr_hw_segments = 0; rq->ioprio = 0; rq->special = NULL; - rq->raw_data_len = 0; rq->buffer = NULL; rq->tag = -1; rq->errors = 0; @@ -135,6 +134,7 @@ void rq_init(struct request_queue *q, struct request *rq) rq->cmd_len = 0; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->data_len = 0; + rq->extra_len = 0; rq->sense_len = 0; rq->data = NULL; rq->sense = NULL; @@ -2018,7 +2018,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); rq->buffer = bio_data(bio); - rq->raw_data_len = bio->bi_size; rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; diff --git a/block/blk-map.c b/block/blk-map.c index 09f7fd0bcb73..f5598322954d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -19,7 +19,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, rq->biotail->bi_next = bio; rq->biotail = bio; - rq->raw_data_len += bio->bi_size; rq->data_len += bio->bi_size; } return 0; @@ -151,11 +150,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, */ if (len & queue_dma_alignment(q)) { unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; - struct bio *bio = rq->biotail; - bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; - bio->bi_size += pad_len; - rq->data_len += pad_len; + rq->extra_len += pad_len; } rq->buffer = rq->data = NULL; diff --git a/block/blk-merge.c b/block/blk-merge.c index 7506c4fe0264..0f58616bcd7f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -231,7 +231,7 @@ new_segment: ((unsigned long)q->dma_drain_buffer) & (PAGE_SIZE - 1)); nsegs++; - rq->data_len += q->dma_drain_size; + rq->extra_len += q->dma_drain_size; } if (sg) diff --git a/block/bsg.c b/block/bsg.c index 7f3c09549e4b..8917c5174dc2 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -437,14 +437,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->raw_data_len; - hdr->din_resid = rq->next_rq->raw_data_len; + hdr->dout_resid = rq->data_len; + hdr->din_resid = rq->next_rq->data_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->raw_data_len; + hdr->din_resid = rq->data_len; else - hdr->dout_resid = rq->raw_data_len; + hdr->dout_resid = rq->data_len; /* * If the request generated a negative error number, return it diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index e993cac4911d..a2c3a936ebf9 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -266,7 +266,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->raw_data_len; + hdr->resid = rq->data_len; hdr->sb_len_wr = 0; if (rq->sense_len && hdr->sbp) { @@ -528,8 +528,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; - rq->raw_data_len = 0; rq->data_len = 0; + rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7b1f1ee8131d..fe47922dd69e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2538,7 +2538,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) } qc->tf.command = ATA_CMD_PACKET; - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* check whether ATAPI DMA is safe */ if (!using_pio && ata_check_atapi_dma(qc)) @@ -2549,7 +2549,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) * want to set it properly, and for DMA where it is * effectively meaningless. */ - nbytes = min(scmd->request->raw_data_len, (unsigned int)63 * 1024); + nbytes = min(scmd->request->data_len, (unsigned int)63 * 1024); /* Most ATAPI devices which honor transfer chunk size don't * behave according to the spec when odd chunk size which @@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) * TODO: find out if we need to do more here to * cover scatter/gather case. */ - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* request result TF and be quiet about device error */ qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6fe67d1939c2..b72526c13ca0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,8 +216,8 @@ struct request { unsigned int cmd_len; unsigned char cmd[BLK_MAX_CDB]; - unsigned int raw_data_len; unsigned int data_len; + unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; void *data; void *sense; From e3790c7d42a545e8fe8b38b513613ca96687b670 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Mar 2008 11:18:17 +0100 Subject: [PATCH 05/17] block: separate out padding from alignment Block layer alignment was used for two different purposes - memory alignment and padding. This causes problems in lower layers because drivers which only require memory alignment ends up with adjusted rq->data_len. Separate out padding such that padding occurs iff driver explicitly requests it. Tomo: restorethe code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa according to padding alignment. Signed-off-by: Tejun Heo Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-map.c | 20 +++++++++++++------- block/blk-settings.c | 17 +++++++++++++++++ drivers/ata/libata-scsi.c | 3 ++- include/linux/blkdev.h | 2 ++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index f5598322954d..4e17dfd0035d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -43,6 +43,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, void __user *ubuf, unsigned int len) { unsigned long uaddr; + unsigned int alignment; struct bio *bio, *orig_bio; int reading, ret; @@ -53,8 +54,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - if (!(uaddr & queue_dma_alignment(q)) && - !(len & queue_dma_alignment(q))) + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + if (!(uaddr & alignment) && !(len & alignment)) bio = bio_map_user(q, NULL, uaddr, len, reading); else bio = bio_copy_user(q, uaddr, len, reading); @@ -141,15 +142,20 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, /* * __blk_rq_map_user() copies the buffers if starting address - * or length isn't aligned. As the copied buffer is always - * page aligned, we know that there's enough room for padding. - * Extend the last bio and update rq->data_len accordingly. + * or length isn't aligned to dma_pad_mask. As the copied + * buffer is always page aligned, we know that there's enough + * room for padding. Extend the last bio and update + * rq->data_len accordingly. * * On unmap, bio_uncopy_user() will use unmodified * bio_map_data pointed to by bio->bi_private. */ - if (len & queue_dma_alignment(q)) { - unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; + if (len & q->dma_pad_mask) { + unsigned int pad_len = (q->dma_pad_mask & ~len) + 1; + struct bio *bio = rq->biotail; + + bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; + bio->bi_size += pad_len; rq->extra_len += pad_len; } diff --git a/block/blk-settings.c b/block/blk-settings.c index da923fed1f2c..a9f37f530b15 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -292,6 +292,23 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); +/** + * blk_queue_dma_pad - set pad mask + * @q: the request queue for the device + * @mask: pad mask + * + * Set pad mask. Direct IO requests are padded to the mask specified. + * + * Appending pad buffer to a request modifies ->data_len such that it + * includes the pad buffer. The original requested data length can be + * obtained using blk_rq_raw_data_len(). + **/ +void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) +{ + q->dma_pad_mask = mask; +} +EXPORT_SYMBOL(blk_queue_dma_pad); + /** * blk_queue_dma_drain - Set up a drain buffer for excess dma. * @q: the request queue for the device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index fe47922dd69e..8f0e8f2bc628 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -862,9 +862,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, struct request_queue *q = sdev->request_queue; void *buf; - /* set the min alignment */ + /* set the min alignment and padding */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_DMA_PAD_SZ - 1); + blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1); /* configure draining */ buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b72526c13ca0..6f79d40dd3c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -362,6 +362,7 @@ struct request_queue unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; + unsigned int dma_pad_mask; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; @@ -701,6 +702,7 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); +extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); From 419c434c35614609fd0c79d335c134bf4b88b30b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 Mar 2008 11:20:51 +0100 Subject: [PATCH 06/17] Fix DMA access of block device in 64-bit kernel on some non-x86 systems with 4GB or upper 4GB memory For some non-x86 systems with 4GB or upper 4GB memory, we need increase the range of addresses that can be used for direct DMA in 64-bit kernel. Signed-off-by: Yang Shi Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index a9f37f530b15..18fab5193d81 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -140,7 +140,7 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) /* Assume anything <= 4GB can be handled by IOMMU. Actually some IOMMUs can handle everything, but I don't know of a way to test this here. */ - if (b_pfn < (min_t(u64, 0xffffffff, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) + if (b_pfn <= (min_t(u64, 0xffffffff, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; q->bounce_pfn = max_low_pfn; #else From 278caf0120a77e4398762357a8cc522d094fe2f2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:44 +0100 Subject: [PATCH 07/17] block/blk-tag.c should #include "blk.h" Every file should include the headers containing the externs for its global functions (in this case for __blk_queue_free_tags()). Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk-tag.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-tag.c b/block/blk-tag.c index a8c37d4bbb32..4780a46ce234 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -6,6 +6,8 @@ #include #include +#include "blk.h" + /** * blk_queue_find_tag - find a request by its tag and queue * @q: The request queue for the device From ff88972c850ced92b9c4c7f538d85829c741eeb0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:45 +0100 Subject: [PATCH 08/17] proper prototype for blk_dev_init() This patch adds a proper prototye for blk_dev_init() in block/blk.h Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk.h | 2 ++ block/genhd.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk.h b/block/blk.h index ec898dd0c65c..ec9120fb789a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -32,6 +32,8 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect); void blk_queue_congestion_threshold(struct request_queue *q); +int blk_dev_init(void); + /* * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the diff --git a/block/genhd.c b/block/genhd.c index 53f2238e69c8..abc6feddc8c6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -17,6 +17,8 @@ #include #include +#include "blk.h" + static DEFINE_MUTEX(block_class_lock); #ifndef CONFIG_SYSFS_DEPRECATED struct kobject *block_depr; @@ -346,8 +348,6 @@ const struct seq_operations partitions_op = { #endif -extern int blk_dev_init(void); - static struct kobject *base_probe(dev_t devt, int *part, void *data) { if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) From 1826eadfc42839af7c1c5a1859510aff635d3fa1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:46 +0100 Subject: [PATCH 09/17] block/genhd.c: cleanups This patch contains the following cleanups: - make the needlessly global struct disk_type static - #if 0 the unused genhd_media_change_notify() Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/genhd.c | 6 +++++- include/linux/genhd.h | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index abc6feddc8c6..c44527d16c52 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -24,6 +24,8 @@ static DEFINE_MUTEX(block_class_lock); struct kobject *block_depr; #endif +static struct device_type disk_type; + /* * Can be deleted altogether. Later. * @@ -502,7 +504,7 @@ struct class block_class = { .name = "block", }; -struct device_type disk_type = { +static struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, @@ -632,12 +634,14 @@ static void media_change_notify_thread(struct work_struct *work) put_device(gd->driverfs_dev); } +#if 0 void genhd_media_change_notify(struct gendisk *disk) { get_device(disk->driverfs_dev); schedule_work(&disk->async_notify); } EXPORT_SYMBOL_GPL(genhd_media_change_notify); +#endif /* 0 */ dev_t blk_lookup_devt(const char *name) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 09a3b18918c7..cd048e3cc96d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -18,7 +18,6 @@ #define dev_to_disk(device) container_of(device, struct gendisk, dev) #define dev_to_part(device) container_of(device, struct hd_struct, dev) -extern struct device_type disk_type; extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; @@ -556,7 +555,6 @@ extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); -extern void genhd_media_change_notify(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, struct module *module, struct kobject *(*probe)(dev_t, int *, void *), From 9d7f1e6b9b2c2e4fe029ff35f4ca1e2879864208 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:47 +0100 Subject: [PATCH 10/17] unexport blk_{get,put}_queue This patch removes the unused exports of blk_{get,put}_queue. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a248cf1c98dd..2a438a93f723 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -424,7 +424,6 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } -EXPORT_SYMBOL(blk_put_queue); void blk_cleanup_queue(struct request_queue *q) { @@ -592,7 +591,6 @@ int blk_get_queue(struct request_queue *q) return 1; } -EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { From bec419404afe8b0d60000118ca90ada4c69a3a6d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:48 +0100 Subject: [PATCH 11/17] unexport blk_rq_map_user_iov This patch removes the unused export of blk_rq_map_user_iov. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk-map.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-map.c b/block/blk-map.c index 4e17dfd0035d..0760963546dd 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -217,7 +217,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, rq->buffer = rq->data = NULL; return 0; } -EXPORT_SYMBOL(blk_rq_map_user_iov); /** * blk_rq_unmap_user - unmap a request with user data From a0db701a6bf767320e4471bd55e70702d230f6fb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:50 +0100 Subject: [PATCH 12/17] block/genhd.c: proper externs This patch adds proper externs for two structs in include/linux/genhd.h Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- fs/proc/proc_misc.c | 3 +-- include/linux/genhd.h | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 468805d40e2b..2d563979cb02 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off, #endif #ifdef CONFIG_BLOCK -extern const struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { return seq_open(file, &partitions_op); @@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = { .release = seq_release, }; -extern const struct seq_operations diskstats_op; static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd048e3cc96d..32c2ac49a070 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -22,6 +22,9 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; +extern const struct seq_operations partitions_op; +extern const struct seq_operations diskstats_op; + enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ From ecb80c6a490863af569853eea2a925f97e9e856a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:51 +0100 Subject: [PATCH 13/17] make cdrom.c:check_for_audio_disc() static This patch makes the needlessly global check_for_audio_disc() static. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index db259e60289b..12f5baea439b 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1152,8 +1152,8 @@ clean_up_and_return: /* This code is similar to that in open_for_data. The routine is called whenever an audio play operation is requested. */ -int check_for_audio_disc(struct cdrom_device_info * cdi, - struct cdrom_device_ops * cdo) +static int check_for_audio_disc(struct cdrom_device_info * cdi, + struct cdrom_device_ops * cdo) { int ret; tracktype tracks; From 68d95b585f1b67b3c89ce0eb934e221ebeeb5c61 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Tue, 4 Mar 2008 11:25:15 +0100 Subject: [PATCH 14/17] cciss: remove READ_AHEAD define and use block layer defaults This patch removes the #define READ_AHEAD 1024 from the driver and uses the block layer defaults, instead. We have found that under certain workloads the setting can cause a disk connected to the e200 controller to go offline. If the disk hiccups the link may try to downshift but the controller is never notified that the link successfully completed the renegotiation. We've also found that performance using the block layer default of 32 pages was on par with the 1024 setting. We tried setting it to zero at one time based on info from our firmware guys but that killed performance. Turns out we were talking about 2 different read ahead settings. Please consider this for inclusion. Signed-off-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index f1e7390683fd..55bd35c0f082 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -132,7 +132,6 @@ static struct board_type products[] = { /*define how many times we will try a command because of bus resets */ #define MAX_CMD_RETRIES 3 -#define READ_AHEAD 1024 #define MAX_CTLR 32 /* Originally cciss driver only supports 8 major numbers */ @@ -1402,7 +1401,6 @@ geo_inq: disk->private_data = &h->drv[drv_index]; /* Set up queue information */ - disk->queue->backing_dev_info.ra_pages = READ_AHEAD; blk_queue_bounce_limit(disk->queue, hba[ctlr]->pdev->dma_mask); /* This is a hardware imposed limit. */ @@ -3495,7 +3493,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, } drv->queue = q; - q->backing_dev_info.ra_pages = READ_AHEAD; blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); /* This is a hardware imposed limit. */ From 448da4d262b5db90817ce853726ff4d9b0c2bf48 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 4 Mar 2008 11:30:18 +0100 Subject: [PATCH 15/17] block: remove extern on function definition Intoduced between 2.6.25-rc2 and -rc3 block/blk-settings.c:319:12: warning: function 'blk_queue_dma_drain' with external linkage has definition Signed-off-by: Harvey Harrison Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 18fab5193d81..1344a0ea5cc6 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -332,7 +332,7 @@ EXPORT_SYMBOL(blk_queue_dma_pad); * device can support otherwise there won't be room for the drain * buffer. */ -extern int blk_queue_dma_drain(struct request_queue *q, +int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size) { From 56d94a37f63ad1c9da3bc8e903f79d0ee1e80170 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 4 Mar 2008 11:31:22 +0100 Subject: [PATCH 16/17] block: fix shadowed variable warning in blk-map.c Introduced between 2.6.25-rc2 and -rc3 block/blk-map.c:154:14: warning: symbol 'bio' shadows an earlier one block/blk-map.c:110:13: originally declared here Signed-off-by: Harvey Harrison Signed-off-by: Jens Axboe --- block/blk-map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 0760963546dd..c07d9c8317f4 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -152,10 +152,10 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, */ if (len & q->dma_pad_mask) { unsigned int pad_len = (q->dma_pad_mask & ~len) + 1; - struct bio *bio = rq->biotail; + struct bio *tail = rq->biotail; - bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; - bio->bi_size += pad_len; + tail->bi_io_vec[tail->bi_vcnt - 1].bv_len += pad_len; + tail->bi_size += pad_len; rq->extra_len += pad_len; } From cc66b4512cae8df4ed1635483210aabf7690ec27 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Mar 2008 11:47:46 +0100 Subject: [PATCH 17/17] block: fix blkdev_issue_flush() not detecting and passing EOPNOTSUPP back This is important to eg dm, that tries to decide whether to stop using barriers or not. Tested as working by Anders Henke Signed-off-by: Jens Axboe --- block/blk-barrier.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6901eedeffce..55c5f1fc4f1f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -259,8 +259,11 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) static void bio_end_empty_barrier(struct bio *bio, int err) { - if (err) + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); clear_bit(BIO_UPTODATE, &bio->bi_flags); + } complete(bio->bi_private); } @@ -309,7 +312,9 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) *error_sector = bio->bi_sector; ret = 0; - if (!bio_flagged(bio, BIO_UPTODATE)) + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; bio_put(bio);