From e9ef46369f5107e634a93b7fc4e62a1f53343197 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 24 Jul 2012 15:01:04 -0600 Subject: [PATCH 01/91] NVMe: Set request queue logical block size Sets the request queue logical block size with the block size of the namespace. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index f4996b0e4b1a..38b9c73f6706 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1344,6 +1344,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, ns->disk = disk; lbaf = id->flbas & 0xf; ns->lba_shift = id->lbaf[lbaf].ds; + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); disk->major = nvme_major; disk->minors = NVME_MINORS; From 5c42ea1643a630060f9e71e06d3933d244970967 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 25 Jul 2012 16:05:18 -0600 Subject: [PATCH 02/91] NVMe: Fix nvme module init when nvme_major is set register_blkdev returns 0 when given a valid major number. Reported-by:Ross Zwisler Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 38b9c73f6706..46e33eec6298 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1716,9 +1716,11 @@ static int __init nvme_init(void) if (IS_ERR(nvme_thread)) return PTR_ERR(nvme_thread); - nvme_major = register_blkdev(nvme_major, "nvme"); - if (nvme_major <= 0) + result = register_blkdev(nvme_major, "nvme"); + if (result < 0) goto kill_kthread; + else if (result > 0) + nvme_major = result; result = pci_register_driver(&nvme_driver); if (result) From 50af8baec46a99a9b81a4600c0374f83a5a590a9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 25 Jul 2012 16:07:55 -0600 Subject: [PATCH 03/91] NVMe: replace nvme_ns with nvme_dev for user admin The function nvme_user_admin_command does not require a namespace to proceed. Replace with the nvme_dev structure so that it can be called from contexts that do not have a namespace. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 46e33eec6298..89935853cb85 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1151,10 +1151,9 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return status; } -static int nvme_user_admin_cmd(struct nvme_ns *ns, +static int nvme_user_admin_cmd(struct nvme_dev *dev, struct nvme_admin_cmd __user *ucmd) { - struct nvme_dev *dev = ns->dev; struct nvme_admin_cmd cmd; struct nvme_command c; int status, length; @@ -1209,7 +1208,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case NVME_IOCTL_ID: return ns->ns_id; case NVME_IOCTL_ADMIN_CMD: - return nvme_user_admin_cmd(ns, (void __user *)arg); + return nvme_user_admin_cmd(ns->dev, (void __user *)arg); case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); default: From a42ceccef0c43b46ff6bc1b12a7c1076ef243df1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 25 Jul 2012 16:06:38 -0600 Subject: [PATCH 04/91] NVMe: use namespace id for nvme_get_features The specification does not provide a use for command dword11 in the NVMe Get Features command, but does use the NSID for some features. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 89935853cb85..7bcd88205a41 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -840,15 +840,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, } static int nvme_get_features(struct nvme_dev *dev, unsigned fid, - unsigned dword11, dma_addr_t dma_addr) + unsigned nsid, dma_addr_t dma_addr) { struct nvme_command c; memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; + c.features.nsid = cpu_to_le32(nsid); c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - c.features.dword11 = cpu_to_le32(dword11); return nvme_submit_admin_cmd(dev, &c, NULL); } From 8fc23e032debd682f5ba9fc524a5846c10d2c522 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 26 Jul 2012 11:29:57 -0600 Subject: [PATCH 05/91] NVMe: Set block queue max sectors Set the max hw sectors in a namespace's request queue if the nvme device has a max data transfer size. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 7 +++++++ include/linux/nvme.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 7bcd88205a41..11951fa11a90 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -78,6 +78,7 @@ struct nvme_dev { char serial[20]; char model[40]; char firmware_rev[8]; + u32 max_hw_sectors; }; /* @@ -1344,6 +1345,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, lbaf = id->flbas & 0xf; ns->lba_shift = id->lbaf[lbaf].ds; blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); + if (dev->max_hw_sectors) + blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); disk->major = nvme_major; disk->minors = NVME_MINORS; @@ -1485,6 +1488,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); + if (ctrl->mdts) { + int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; + dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); + } id_ns = mem; for (i = 1; i <= nn; i++) { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9490a00529f4..8c71d2004c6d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -37,6 +37,7 @@ struct nvme_bar { #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) enum { NVME_CC_ENABLE = 1 << 0, From a0cadb85b8b758608ae0759151e29de7581c6731 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 27 Jul 2012 13:57:23 -0400 Subject: [PATCH 06/91] NVMe: Do not set IO queue depth beyond device max Set the depth for IO queues to the device's maximum supported queue entries if the requested depth exceeds the device's capabilities. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 10 ++++++---- include/linux/nvme.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 11951fa11a90..af1ef39bd6b4 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -893,7 +893,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth, int vector) { struct device *dmadev = &dev->pci_dev->dev; - unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); + unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * + sizeof(struct nvme_cmd_info)); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); if (!nvmeq) return NULL; @@ -1391,7 +1392,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) { - int result, cpu, i, nr_io_queues, db_bar_size; + int result, cpu, i, nr_io_queues, db_bar_size, q_depth; nr_io_queues = num_online_cpus(); result = set_queue_count(dev, nr_io_queues); @@ -1437,9 +1438,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) cpu = cpumask_next(cpu, cpu_online_mask); } + q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, + NVME_Q_DEPTH); for (i = 0; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, - NVME_Q_DEPTH, i); + dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); if (IS_ERR(dev->queues[i + 1])) return PTR_ERR(dev->queues[i + 1]); dev->queue_count++; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8c71d2004c6d..c25cccaa555a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -35,6 +35,7 @@ struct nvme_bar { __u64 acq; /* Admin CQ Base Address */ }; +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) From c7d36ab8fa04c213328119a9c0d66985fe204ee5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 27 Jul 2012 11:53:28 -0600 Subject: [PATCH 07/91] NVMe: Fix uninitialized iod compiler warning Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index af1ef39bd6b4..6c0eb768562f 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1159,7 +1159,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, struct nvme_admin_cmd cmd; struct nvme_command c; int status, length; - struct nvme_iod *iod; + struct nvme_iod *uninitialized_var(iod); if (!capable(CAP_SYS_ADMIN)) return -EACCES; From 22fff826e715e9727d3c7a69f15e602a9801b673 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Jan 2012 07:55:30 -0500 Subject: [PATCH 08/91] NVMe: handle allocation failure in nvme_map_user_pages() We should return here and avoid a NULL dereference. Signed-off-by: Dan Carpenter Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 6c0eb768562f..064e86a6bb4e 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1044,6 +1044,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, offset = offset_in_page(addr); count = DIV_ROUND_UP(offset + length, PAGE_SIZE); pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); err = get_user_pages_fast(addr, count, 1, pages); if (err < count) { From 0ac13140d796eb1e2f8956aea97a6e5e4ebcf981 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 31 Jul 2012 13:31:15 -0400 Subject: [PATCH 09/91] NVMe: Fix whitespace damage in nvme_init Commit 5c42ea1643 used spaces instead of tabs. Also remove the unnecessary initialisation of the 'result' variable. Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 064e86a6bb4e..0ba6b7cb344b 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1720,7 +1720,7 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { - int result = -EBUSY; + int result; nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); if (IS_ERR(nvme_thread)) @@ -1730,7 +1730,7 @@ static int __init nvme_init(void) if (result < 0) goto kill_kthread; else if (result > 0) - nvme_major = result; + nvme_major = result; result = pci_register_driver(&nvme_driver); if (result) From cd58ad7d188c643cf572b038909c2f7dd96fdafe Mon Sep 17 00:00:00 2001 From: Quoc-Son Anh Date: Tue, 21 Feb 2012 16:50:53 -0700 Subject: [PATCH 10/91] NVMe: Use ida for nvme device instance Signed-off-by: Quoc-Son Anh Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 0ba6b7cb344b..3278fbdb8dc0 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1576,15 +1576,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } -/* XXX: Use an ida or something to let remove / add work correctly */ -static void nvme_set_instance(struct nvme_dev *dev) +static DEFINE_IDA(nvme_instance_ida); + +static int nvme_set_instance(struct nvme_dev *dev) { - static int instance; - dev->instance = instance++; + int instance, error; + + do { + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) + return -ENODEV; + + spin_lock(&dev_list_lock); + error = ida_get_new(&nvme_instance_ida, &instance); + spin_unlock(&dev_list_lock); + } while (error == -EAGAIN); + + if (error) + return -ENODEV; + + dev->instance = instance; + return 0; } static void nvme_release_instance(struct nvme_dev *dev) { + spin_lock(&dev_list_lock); + ida_remove(&nvme_instance_ida, dev->instance); + spin_unlock(&dev_list_lock); } static int __devinit nvme_probe(struct pci_dev *pdev, @@ -1617,7 +1635,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - nvme_set_instance(dev); + result = nvme_set_instance(dev); + if (result) + goto disable; + dev->entry[0].vector = pdev->irq; result = nvme_setup_prp_pools(dev); From 9e866774aab5d2654b0fa8f97890f68913f05700 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Aug 2012 13:55:56 -0400 Subject: [PATCH 11/91] NVMe: Free admin queue memory on initialisation failure If the adapter fails initialisation, the memory allocated for the admin queue may not be freed. Split the memory freeing part of nvme_free_queue() into nvme_free_queue_mem() and call it in the case of initialisation failure. Signed-off-by: Matthew Wilcox Reported-by: Vishal Verma --- drivers/block/nvme.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 3278fbdb8dc0..214037055e2a 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -868,6 +868,15 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, return nvme_submit_admin_cmd(dev, &c, result); } +static void nvme_free_queue_mem(struct nvme_queue *nvmeq) +{ + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); + kfree(nvmeq); +} + static void nvme_free_queue(struct nvme_dev *dev, int qid) { struct nvme_queue *nvmeq = dev->queues[qid]; @@ -882,11 +891,7 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) adapter_delete_cq(dev, qid); } - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); + nvme_free_queue_mem(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, @@ -982,7 +987,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) { - int result; + int result = 0; u32 aqa; u64 cap; unsigned long timeout; @@ -1012,17 +1017,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; dev->db_stride = NVME_CAP_STRIDE(cap); - while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { + while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { msleep(100); if (fatal_signal_pending(current)) - return -EINTR; + result = -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, "Device not ready; aborting initialisation\n"); - return -ENODEV; + result = -ENODEV; } } + if (result) { + nvme_free_queue_mem(nvmeq); + return result; + } + result = queue_request_irq(dev, nvmeq, "nvme admin"); dev->queues[0] = nvmeq; return result; From a09115b23e2002bb35b7bfd337683f00875671ec Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 7 Aug 2012 15:56:23 -0400 Subject: [PATCH 12/91] NVMe: Cancel outstanding IOs on queue deletion If the device is hot-unplugged while there are active commands, we should time out the I/Os so that upper layers don't just see the I/Os disappear. Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 55 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 214037055e2a..f9ad514c9227 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -868,6 +868,33 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, return nvme_submit_admin_cmd(dev, &c, result); } +/** + * nvme_cancel_ios - Cancel outstanding I/Os + * @queue: The queue to cancel I/Os on + * @timeout: True to only cancel I/Os which have timed out + */ +static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) +{ + int depth = nvmeq->q_depth - 1; + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + unsigned long now = jiffies; + int cmdid; + + for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { + void *ctx; + nvme_completion_fn fn; + static struct nvme_completion cqe = { + .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, + }; + + if (timeout && !time_after(now, info[cmdid].timeout)) + continue; + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); + ctx = cancel_cmdid(nvmeq, cmdid, &fn); + fn(nvmeq->dev, ctx, &cqe); + } +} + static void nvme_free_queue_mem(struct nvme_queue *nvmeq) { dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), @@ -882,6 +909,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) struct nvme_queue *nvmeq = dev->queues[qid]; int vector = dev->entry[nvmeq->cq_vector].vector; + spin_lock_irq(&nvmeq->q_lock); + nvme_cancel_ios(nvmeq, false); + spin_unlock_irq(&nvmeq->q_lock); + irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); @@ -1236,26 +1267,6 @@ static const struct block_device_operations nvme_fops = { .compat_ioctl = nvme_ioctl, }; -static void nvme_timeout_ios(struct nvme_queue *nvmeq) -{ - int depth = nvmeq->q_depth - 1; - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - unsigned long now = jiffies; - int cmdid; - - for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { - void *ctx; - nvme_completion_fn fn; - static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; - - if (!time_after(now, info[cmdid].timeout)) - continue; - dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); - ctx = cancel_cmdid(nvmeq, cmdid, &fn); - fn(nvmeq->dev, ctx, &cqe); - } -} - static void nvme_resubmit_bios(struct nvme_queue *nvmeq) { while (bio_list_peek(&nvmeq->sq_cong)) { @@ -1287,7 +1298,7 @@ static int nvme_kthread(void *data) spin_lock_irq(&nvmeq->q_lock); if (nvme_process_cq(nvmeq)) printk("process_cq did something\n"); - nvme_timeout_ios(nvmeq); + nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1549,8 +1560,6 @@ static int nvme_dev_remove(struct nvme_dev *dev) list_del(&dev->node); spin_unlock(&dev_list_lock); - /* TODO: wait all I/O finished or cancel them */ - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { list_del(&ns->list); del_gendisk(ns->disk); From 3d1cbdd6aefff711bcf389fdabc4af9bc22e8201 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:08 +0200 Subject: [PATCH 13/91] Bluetooth: mgmt: Fix enabling SSP while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set SSP will only set dev_flags but won't write changes to controller. As a result remote devices won't use Secure Simple Pairing with our device due to SSP Host Support flag disabled in extended features and may also reject SSP attempt from our side (with possible fallback to legacy pairing). This patch ensures HCI Write Simple Pairing Mode is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca6..f943bbfc9c61 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2875,6 +2875,12 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) if (scan) hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 ssp = 1; + + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); From 562fcc246ebe31ade6e1be08585673b9b2785498 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:09 +0200 Subject: [PATCH 14/91] Bluetooth: mgmt: Fix enabling LE while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set LE will only set dev_flags but won't write changes to controller. As a result it's not possible to start device discovery session on LE controller as it uses interleaved discovery which requires LE Supported Host flag in extended features. This patch ensures HCI Write LE Host Supported is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f943bbfc9c61..eba022de3c20 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2881,6 +2881,16 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); } + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 1; + cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR); + + hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); From aad3d0e343900a4c2c5dbc73f76550aa64a0ac1b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:42 +0300 Subject: [PATCH 15/91] Bluetooth: Fix freeing uninitialized delayed works When releasing L2CAP socket which is in BT_CONFIG state l2cap_chan_close invokes l2cap_send_disconn_req which cancel delayed works which are only set in BT_CONNECTED state with l2cap_ertm_init. Add state check before cancelling those works. ... [ 9668.574372] [21085] l2cap_sock_release: sock cd065200, sk f073e800 [ 9668.574399] [21085] l2cap_sock_shutdown: sock cd065200, sk f073e800 [ 9668.574411] [21085] l2cap_chan_close: chan f073ec00 state BT_CONFIG sk f073e800 [ 9668.574421] [21085] l2cap_send_disconn_req: chan f073ec00 conn ecc16600 [ 9668.574441] INFO: trying to register non-static key. [ 9668.574443] the code is fine but needs lockdep annotation. [ 9668.574446] turning off the locking correctness validator. [ 9668.574450] Pid: 21085, comm: obex-client Tainted: G O 3.5.0+ #57 [ 9668.574452] Call Trace: [ 9668.574463] [] __lock_acquire+0x12e3/0x1700 [ 9668.574468] [] ? trace_hardirqs_on+0xb/0x10 [ 9668.574476] [] ? printk+0x4d/0x4f [ 9668.574479] [] lock_acquire+0x88/0x130 [ 9668.574487] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574491] [] del_timer_sync+0x50/0xc0 [ 9668.574495] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574515] [] l2cap_send_disconn_req+0xe3/0x160 [bluetooth] ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4ea1710a4783..38c00f142203 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1008,7 +1008,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c if (!conn) return; - if (chan->mode == L2CAP_MODE_ERTM) { + if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); From 78c04c0bf52360dc2f7185e99c8e9aa05d73ae5a Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 14 Sep 2012 16:34:46 -0300 Subject: [PATCH 16/91] Bluetooth: Fix not removing power_off delayed work For example, when a usb reset is received (I could reproduce it running something very similar to this[1] in a loop) it could be that the device is unregistered while the power_off delayed work is still scheduled to run. Backtrace: WARNING: at lib/debugobjects.c:261 debug_print_object+0x7c/0x8d() Hardware name: To Be Filled By O.E.M. ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x26 Modules linked in: nouveau mxm_wmi btusb wmi bluetooth ttm coretemp drm_kms_helper Pid: 2114, comm: usb-reset Not tainted 3.5.0bt-next #2 Call Trace: [] ? free_obj_work+0x57/0x91 [] warn_slowpath_common+0x7e/0x97 [] warn_slowpath_fmt+0x41/0x43 [] debug_print_object+0x7c/0x8d [] ? __queue_work+0x259/0x259 [] ? debug_check_no_obj_freed+0x6f/0x1b5 [] debug_check_no_obj_freed+0x98/0x1b5 [] ? bt_host_release+0x10/0x1e [bluetooth] [] kfree+0x90/0xe6 [] bt_host_release+0x10/0x1e [bluetooth] [] device_release+0x4a/0x7e [] kobject_release+0x11d/0x154 [] kobject_put+0x4a/0x4f [] put_device+0x12/0x14 [] hci_free_dev+0x22/0x26 [bluetooth] [] btusb_disconnect+0x96/0x9f [btusb] [] usb_unbind_interface+0x57/0x106 [] __device_release_driver+0x83/0xd6 [] device_release_driver+0x20/0x2d [] usb_driver_release_interface+0x44/0x7b [] usb_forced_unbind_intf+0x45/0x4e [] usb_reset_device+0xa6/0x12e [] usbdev_do_ioctl+0x319/0xe20 [] ? avc_has_perm_flags+0xc9/0x12e [] ? avc_has_perm_flags+0x25/0x12e [] ? do_page_fault+0x31e/0x3a1 [] usbdev_ioctl+0x9/0xd [] vfs_ioctl+0x21/0x34 [] do_vfs_ioctl+0x408/0x44b [] ? file_has_perm+0x76/0x81 [] sys_ioctl+0x51/0x76 [] system_call_fastpath+0x16/0x1b [1] http://cpansearch.perl.org/src/DPAVLIN/Biblio-RFID-0.03/examples/usbreset.c Signed-off-by: Vinicius Costa Gomes Cc: stable@vger.kernel.org Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5a..0b997c8f9655 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -734,6 +734,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_work_sync(&hdev->le_scan); + cancel_delayed_work(&hdev->power_off); + hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); From a85d0d7f3460b1a123b78e7f7e39bf72c37dfb78 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 14 Sep 2012 15:36:57 -0700 Subject: [PATCH 17/91] cfg80211: fix possible circular lock on reg_regdb_search() When call_crda() is called we kick off a witch hunt search for the same regulatory domain on our internal regulatory database and that work gets kicked off on a workqueue, this is done while the cfg80211_mutex is held. If that workqueue kicks off it will first lock reg_regdb_search_mutex and later cfg80211_mutex but to ensure two CPUs will not contend against cfg80211_mutex the right thing to do is to have the reg_regdb_search() wait until the cfg80211_mutex is let go. The lockdep report is pasted below. cfg80211: Calling CRDA to update world regulatory domain ====================================================== [ INFO: possible circular locking dependency detected ] 3.3.8 #3 Tainted: G O ------------------------------------------------------- kworker/0:1/235 is trying to acquire lock: (cfg80211_mutex){+.+...}, at: [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] but task is already holding lock: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (reg_regdb_search_mutex){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<81645778>] is_world_regdom+0x9f8/0xc74 [cfg80211] -> #1 (reg_mutex#2){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<8164539c>] is_world_regdom+0x61c/0xc74 [cfg80211] -> #0 (cfg80211_mutex){+.+...}: [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] other info that might help us debug this: Chain exists of: cfg80211_mutex --> reg_mutex#2 --> reg_regdb_search_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reg_regdb_search_mutex); lock(reg_mutex#2); lock(reg_regdb_search_mutex); lock(cfg80211_mutex); *** DEADLOCK *** 3 locks held by kworker/0:1/235: #0: (events){.+.+..}, at: [<80089a00>] process_one_work+0x230/0x460 #1: (reg_regdb_work){+.+...}, at: [<80089a00>] process_one_work+0x230/0x460 #2: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] stack backtrace: Call Trace: [<80290fd4>] dump_stack+0x8/0x34 [<80291bc4>] print_circular_bug+0x2ac/0x2d8 [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] Reported-by: Felix Fietkau Tested-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2ded3c7fad06..72d170ca3406 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); From a7be50b7e30f9d77cb059a7ffdb781bb0fb92eba Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Sep 2012 19:48:59 +0200 Subject: [PATCH 18/91] iwlwifi: don't double free the interrupt in failure path When the driver can't get the HW ready, we would release the interrupt twice which made the kernel complain loudly. Cc: stable@vger.kernel.org Reported-by: Brian Cockrell Tested-by: Brian Cockrell Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/pcie/trans.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 1e86ea2266d4..dbeebef562d5 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1442,6 +1442,7 @@ static int iwl_trans_pcie_start_hw(struct iwl_trans *trans) return err; err_free_irq: + trans_pcie->irq_requested = false; free_irq(trans_pcie->irq, trans); error: iwl_free_isr_ict(trans); From b37b593e20a073dce5c7cbaf230f166db6425450 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Sep 2012 10:48:03 -0600 Subject: [PATCH 19/91] vfio: Trivial Documentation correction Signed-off-by: Alex Williamson --- Documentation/vfio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 0cb6685c8029..8eda3635a17d 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -133,7 +133,7 @@ character devices for this group: $ lspci -n -s 0000:06:0d.0 06:0d.0 0401: 1102:0002 (rev 08) # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind -# echo 1102 0002 > /sys/bus/pci/drivers/vfio/new_id +# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id Now we need to look at what other devices are in the group to free it for use by VFIO: From b68e7fa879cd3b1126a7c455d9da1b70299efc0d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Sep 2012 10:48:28 -0600 Subject: [PATCH 20/91] vfio: Fix virqfd release race vfoi-pci supports a mechanism like KVM's irqfd for unmasking an interrupt through an eventfd. There are two ways to shutdown this interface: 1) close the eventfd, 2) ioctl (such as disabling the interrupt). Both of these do the release through a workqueue, which can result in a segfault if two jobs get queued for the same virqfd. Fix this by protecting the pointer to these virqfds by a spinlock. The vfio pci device will therefore no longer have a reference to it once the release job is queued under lock. On the ioctl side, we still flush the workqueue to ensure that any outstanding releases are completed. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_intrs.c | 76 +++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 211a4920b88a..d8dedc7d3910 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) schedule_work(&virqfd->inject); } - if (flags & POLLHUP) - /* The eventfd is closing, detach from VFIO */ - virqfd_deactivate(virqfd); + if (flags & POLLHUP) { + unsigned long flags; + spin_lock_irqsave(&virqfd->vdev->irqlock, flags); + + /* + * The eventfd is closing, if the virqfd has not yet been + * queued for release, as determined by testing whether the + * vdev pointer to it is still valid, queue it now. As + * with kvm irqfds, we know we won't race against the virqfd + * going away because we hold wqh->lock to get here. + */ + if (*(virqfd->pvirqfd) == virqfd) { + *(virqfd->pvirqfd) = NULL; + virqfd_deactivate(virqfd); + } + + spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); + } return 0; } @@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file, static void virqfd_shutdown(struct work_struct *work) { struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - struct virqfd **pvirqfd = virqfd->pvirqfd; u64 cnt; eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); @@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work) eventfd_ctx_put(virqfd->eventfd); kfree(virqfd); - *pvirqfd = NULL; } static void virqfd_inject(struct work_struct *work) @@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev, int ret = 0; unsigned int events; - if (*pvirqfd) - return -EBUSY; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); if (!virqfd) return -ENOMEM; virqfd->pvirqfd = pvirqfd; - *pvirqfd = virqfd; virqfd->vdev = vdev; virqfd->handler = handler; virqfd->thread = thread; @@ -153,6 +162,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev, virqfd->eventfd = ctx; + /* + * virqfds can be released by closing the eventfd or directly + * through ioctl. These are both done through a workqueue, so + * we update the pointer to the virqfd under lock to avoid + * pushing multiple jobs to release the same virqfd. + */ + spin_lock_irq(&vdev->irqlock); + + if (*pvirqfd) { + spin_unlock_irq(&vdev->irqlock); + ret = -EBUSY; + goto fail; + } + *pvirqfd = virqfd; + + spin_unlock_irq(&vdev->irqlock); + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd. @@ -187,19 +213,29 @@ fail: fput(file); kfree(virqfd); - *pvirqfd = NULL; return ret; } -static void virqfd_disable(struct virqfd *virqfd) +static void virqfd_disable(struct vfio_pci_device *vdev, + struct virqfd **pvirqfd) { - if (!virqfd) - return; + unsigned long flags; - virqfd_deactivate(virqfd); + spin_lock_irqsave(&vdev->irqlock, flags); - /* Block until we know all outstanding shutdown jobs have completed. */ + if (*pvirqfd) { + virqfd_deactivate(*pvirqfd); + *pvirqfd = NULL; + } + + spin_unlock_irqrestore(&vdev->irqlock, flags); + + /* + * Block until we know all outstanding shutdown jobs have completed. + * Even if we don't queue the job, flush the wq to be sure it's + * been released. + */ flush_workqueue(vfio_irqfd_cleanup_wq); } @@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev->ctx[0].unmask); - virqfd_disable(vdev->ctx[0].mask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev->ctx[i].unmask); - virqfd_disable(vdev->ctx[i].mask); + virqfd_disable(vdev, &vdev->ctx[i].unmask); + virqfd_disable(vdev, &vdev->ctx[i].mask); } if (msix) { @@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, vfio_send_intx_eventfd, NULL, &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); } return 0; From 340c7a2b2c9a2da640af28a8c196356484ac8b50 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 10 Aug 2012 13:12:07 -0700 Subject: [PATCH 21/91] rbd: drop dev reference on error in rbd_open() If a read-only rbd device is opened for writing in rbd_open(), it returns without dropping the just-acquired device reference. Fix this by moving the read-only check before getting the reference. Signed-off-by: Alex Elder Reviewed-by: Yehuda Sadeh Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9917943a3572..54a55f03115d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -246,13 +246,12 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - rbd_get_dev(rbd_dev); - - set_device_ro(bdev, rbd_dev->read_only); - if ((mode & FMODE_WRITE) && rbd_dev->read_only) return -EROFS; + rbd_get_dev(rbd_dev); + set_device_ro(bdev, rbd_dev->read_only); + return 0; } From 5ce765a540f34d1e2005e1210f49f67fdf11e997 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 21 Sep 2012 17:59:58 -0500 Subject: [PATCH 22/91] libceph: only kunmap kmapped pages In write_partial_msg_pages(), pages need to be kmapped in order to perform a CRC-32c calculation on them. As an artifact of the way this code used to be structured, the kunmap() call was separated from the kmap() call and both were done conditionally. But the conditions under which the kmap() and kunmap() calls were made differed, so there was a chance a kunmap() call would be done on a page that had not been mapped. The symptom of this was tripping a BUG() in kunmap_high() when pkmap_count[nr] became 0. Reported-by: Bryan K. Wright Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 24c5eea8c45b..159aa8bef9e7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1073,16 +1073,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; crc = crc32c(crc, base, len); + kunmap(page); msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, con->out_msg_pos.page_pos + bio_offset, len, 1); - - if (do_datacrc) - kunmap(page); - if (ret <= 0) goto out; From 9913b8c8f05b0aad97432900fa3b2cdfd557eeb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20H=C3=BCwe?= Date: Fri, 21 Sep 2012 16:44:18 +0000 Subject: [PATCH 23/91] net/phy/bcm87xx: Add MODULE_LICENSE("GPL") to GPL driver Currently the driver has no MODULE_LICENSE attribute in its source which results in a kernel taint if I load this: root@(none):~# modprobe bcm87xx bcm87xx: module license 'unspecified' taints kernel. Since the first lines of the source code clearly state: * This file is subject to the terms and conditions of the GNU General * Public License. See the file "COPYING" in the main directory of this * archive for more details. I think it's safe to add the MODULE_LICENSE("GPL") macro and thus remove the kernel taint. Cc: stable@vger.kernel.org Signed-off-by: Peter Huewe Signed-off-by: David S. Miller --- drivers/net/phy/bcm87xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 2346b38b9837..799789518e87 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -229,3 +229,5 @@ static void __exit bcm87xx_exit(void) ARRAY_SIZE(bcm87xx_driver)); } module_exit(bcm87xx_exit); + +MODULE_LICENSE("GPL"); From ab43ed8b7490cb387782423ecf74aeee7237e591 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 Sep 2012 00:08:29 +0000 Subject: [PATCH 24/91] ipv4: raw: fix icmp_filter() icmp_filter() should not modify its input, or else its caller would need to recompute ip_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969ea..d23c6571ba1c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ From 0b121fd28d071c766dddab743031ba6cfe9be68e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 22 Sep 2012 07:07:53 +0000 Subject: [PATCH 25/91] team: send port changed when added On some hw, link is not up during adding iface to team. That causes event not being sent to userspace and that may cause confusion. Fix this bug by sending port changed event once it's added to team. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 341b65dbbcd3..3ffe8a6e3c8b 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -848,7 +848,7 @@ static struct netpoll_info *team_netpoll_info(struct team *team) } #endif -static void __team_port_change_check(struct team_port *port, bool linkup); +static void __team_port_change_port_added(struct team_port *port, bool linkup); static int team_port_add(struct team *team, struct net_device *port_dev) { @@ -948,7 +948,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) team_port_enable(team, port); list_add_tail_rcu(&port->list, &team->port_list); __team_compute_features(team); - __team_port_change_check(port, !!netif_carrier_ok(port_dev)); + __team_port_change_port_added(port, !!netif_carrier_ok(port_dev)); __team_options_change_check(team); netdev_info(dev, "Port device %s added\n", portname); @@ -983,6 +983,8 @@ err_set_mtu: return err; } +static void __team_port_change_port_removed(struct team_port *port); + static int team_port_del(struct team *team, struct net_device *port_dev) { struct net_device *dev = team->dev; @@ -999,8 +1001,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) __team_option_inst_mark_removed_port(team, port); __team_options_change_check(team); __team_option_inst_del_port(team, port); - port->removed = true; - __team_port_change_check(port, false); + __team_port_change_port_removed(port); team_port_disable(team, port); list_del_rcu(&port->list); netdev_rx_handler_unregister(port_dev); @@ -2251,13 +2252,11 @@ static void __team_options_change_check(struct team *team) } /* rtnl lock is held */ -static void __team_port_change_check(struct team_port *port, bool linkup) + +static void __team_port_change_send(struct team_port *port, bool linkup) { int err; - if (!port->removed && port->state.linkup == linkup) - return; - port->changed = true; port->state.linkup = linkup; team_refresh_port_linkup(port); @@ -2282,6 +2281,23 @@ send_event: } +static void __team_port_change_check(struct team_port *port, bool linkup) +{ + if (port->state.linkup != linkup) + __team_port_change_send(port, linkup); +} + +static void __team_port_change_port_added(struct team_port *port, bool linkup) +{ + __team_port_change_send(port, linkup); +} + +static void __team_port_change_port_removed(struct team_port *port) +{ + port->removed = true; + __team_port_change_send(port, false); +} + static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; From 2b018d57ff18e5405823e5cb59651a5b4d946d7b Mon Sep 17 00:00:00 2001 From: Xiaodong Xu Date: Sat, 22 Sep 2012 00:09:32 +0000 Subject: [PATCH 26/91] pppoe: drop PPPOX_ZOMBIEs in pppoe_release When PPPOE is running over a virtual ethernet interface (e.g., a bonding interface) and the user tries to delete the interface in case the PPPOE state is ZOMBIE, the kernel will loop forever while unregistering net_device for the reference count is not decreased to zero which should have been done with dev_put(). Signed-off-by: Xiaodong Xu Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index cbf7047decc0..20f31d0d1536 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -570,7 +570,7 @@ static int pppoe_release(struct socket *sock) po = pppox_sk(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } From 156cacb1d0d36b0d0582d9e798e58e0044f516b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Sep 2012 08:19:02 -0400 Subject: [PATCH 27/91] do_add_mount()/umount -l races normally we deal with lock_mount()/umount races by checking that mountpoint to be is still in our namespace after lock_mount() has been done. However, do_add_mount() skips that check when called with MNT_SHRINKABLE in flags (i.e. from finish_automount()). The reason is that ->mnt_ns may be a temporary namespace created exactly to contain automounts a-la NFS4 referral handling. It's not the namespace of the caller, though, so check_mnt() would fail here. We still need to check that ->mnt_ns is non-NULL in that case, though. Signed-off-by: Al Viro --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e2561..7bdf7907413f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) - goto unlock; + if (unlikely(!check_mnt(real_mount(path->mnt)))) { + /* that's acceptable only for automounts done in private ns */ + if (!(mnt_flags & MNT_SHRINKABLE)) + goto unlock; + /* ... and for those we'd better have mountpoint still alive */ + if (!real_mount(path->mnt)->mnt_ns) + goto unlock; + } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; From c5aa1e554a20fb3542c62688ae46049c9225a965 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Aug 2012 09:00:01 -0400 Subject: [PATCH 28/91] close the race in nlmsvc_free_block() we need to grab mutex before the reference counter reaches 0 Signed-off-by: Al Viro --- fs/lockd/svclock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe97..8d80c990dffd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ - mutex_lock(&file->f_mutex); list_del_init(&block->b_flist); mutex_unlock(&file->f_mutex); @@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) static void nlmsvc_release_block(struct nlm_block *block) { if (block != NULL) - kref_put(&block->b_count, nlmsvc_free_block); + kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); } /* From 40a3eb33e307616567f4b81792f405a7f3f0abee Mon Sep 17 00:00:00 2001 From: Def Date: Thu, 20 Sep 2012 14:56:13 +0200 Subject: [PATCH 29/91] batman-adv: Fix change mac address of soft iface. Into function interface_set_mac_addr, the function tt_local_add was invoked before updating dev->dev_addr. The new MAC address was not tagged as NoPurge. Signed-off-by: Def --- net/batman-adv/soft-interface.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2aae96c..21c53577c8d6 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -100,18 +100,21 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct sockaddr *addr = p; + uint8_t old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + memcpy(old_addr, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, dev->dev_addr, + batadv_tt_local_remove(bat_priv, old_addr, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } From 7caf69fb9c5017df01945a1861c042f6aa08edeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 18 Sep 2012 03:01:08 +0200 Subject: [PATCH 30/91] batman-adv: Fix symmetry check / route flapping in multi interface setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If receiving an OGM from a neighbor other than the currently selected and if it has the same TQ then we are supposed to switch if this neighbor provides a more symmetric link than the currently selected one. However this symmetry check currently is broken if the interface of the neighbor we received the OGM from and the one of the currently selected neighbor differ: We are currently trying to determine the symmetry of the link towards the selected router via the link we received the OGM from instead of just checking via the link towards the currently selected router. This leads to way more route switches than necessary and can lead to permanent route flapping in many common multi interface setups. This patch fixes this issue by using the right interface for this symmetry check. Signed-off-by: Linus Lüssing --- net/batman-adv/bat_iv_ogm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8bdd1e..469daabd90c7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -642,7 +642,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + int if_num; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -727,17 +728,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = router->if_incoming->if_num; + sum_orig = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = neigh_node->if_incoming->if_num; + sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } From cb13ff69d6d61ab06285e3ef652f45ecda73e135 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 24 Sep 2012 16:27:20 +1000 Subject: [PATCH 31/91] md/raid5: add missing spin_lock_init. commit b17459c05000fdbe8d10946570a26510f86ec0f raid5: add a per-stripe lock added a spin_lock to the 'stripe_head' struct. Unfortunately there are two places where this struct is allocated but the spin lock was only initialised in one of them. So add the missing spin_lock_init. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7031b865b3a0..0689173fd9f5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1591,6 +1591,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&nsh->ops.wait_for_ops); #endif + spin_lock_init(&nsh->stripe_lock); list_add(&nsh->lru, &newstripes); } From ec10665cbf271fb1f60daeb194ad4f2cdcdc59d9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 24 Sep 2012 08:35:03 +0200 Subject: [PATCH 32/91] ARM: dma-mapping: Fix potential memory leak in atomic_pool_init() When either of __alloc_from_contiguous or __alloc_remap_buffer fails to provide a valid pointer, allocated memory is freed up and an error is returned. 'pages' was however not freed before returning error. Cc: Arnd Bergmann Cc: Marek Szyprowski Signed-off-by: Sachin Kamat Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e59c4ab71bcb..13f555d62491 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -346,6 +346,8 @@ static int __init atomic_pool_init(void) (unsigned)pool->size / 1024); return 0; } + + kfree(pages); no_pages: kfree(bitmap); no_bitmap: From 8d54db795dfb1049d45dc34f0dddbc5347ec5642 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Aug 2012 10:22:37 -0400 Subject: [PATCH 33/91] xen/boot: Disable NUMA for PV guests. The hypervisor is in charge of allocating the proper "NUMA" memory and dealing with the CPU scheduler to keep them bound to the proper NUMA node. The PV guests (and PVHVM) have no inkling of where they run and do not need to know that right now. In the future we will need to inject NUMA configuration data (if a guest spans two or more NUMA nodes) so that the kernel can make the right choices. But those patches are not yet present. In the meantime, disable the NUMA capability in the PV guest, which also fixes a bootup issue. Andre says: "we see Dom0 crashes due to the kernel detecting the NUMA topology not by ACPI, but directly from the northbridge (CONFIG_AMD_NUMA). This will detect the actual NUMA config of the physical machine, but will crash about the mismatch with Dom0's virtual memory. Variation of the theme: Dom0 sees what it's not supposed to see. This happens with the said config option enabled and on a machine where this scanning is still enabled (K8 and Fam10h, not Bulldozer class) We have this dump then: NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10 Scanning NUMA topology in Northbridge 24 Number of physical nodes 4 Node 0 MemBase 0000000000000000 Limit 0000000040000000 Node 1 MemBase 0000000040000000 Limit 0000000138000000 Node 2 MemBase 0000000138000000 Limit 00000001f8000000 Node 3 MemBase 00000001f8000000 Limit 0000000238000000 Initmem setup node 0 0000000000000000-0000000040000000 NODE_DATA [000000003ffd9000 - 000000003fffffff] Initmem setup node 1 0000000040000000-0000000138000000 NODE_DATA [0000000137fd9000 - 0000000137ffffff] Initmem setup node 2 0000000138000000-00000001f8000000 NODE_DATA [00000001f095e000 - 00000001f0984fff] Initmem setup node 3 00000001f8000000-0000000238000000 Cannot find 159744 bytes in node 3 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __alloc_bootmem_node+0x43/0x96 Pid: 0, comm: swapper Not tainted 3.3.6 #1 AMD Dinar/Dinar RIP: e030:[] [] __alloc_bootmem_node+0x43/0x96 .. snip.. [] sparse_early_usemaps_alloc_node+0x64/0x178 [] sparse_init+0xe4/0x25a [] paging_init+0x13/0x22 [] setup_arch+0x9c6/0xa9b [] ? printk+0x3c/0x3e [] start_kernel+0xe5/0x468 [] x86_64_start_reservations+0xba/0xc1 [] ? xen_setup_runstate_info+0x2c/0x36 [] xen_start_kernel+0x565/0x56c " so we just disable NUMA scanning by setting numa_off=1. CC: stable@vger.kernel.org Reported-and-Tested-by: Andre Przywara Acked-by: Andre Przywara Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14fc..e2d62d697b5d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -544,4 +545,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } From b02d6175859dfbdecb5ea6562867092b5d69d64e Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 21 Sep 2012 14:35:49 -0400 Subject: [PATCH 34/91] c6x: use asm-generic/barrier.h A recent patch in the linux-next tree caused a build failure on C6X because C6X didn't define a read_barrier_depends() macro. C6X does not support SMP and the architecture doesn't provide any special memory ordering instructions, so it makes sense to just use the generic barrier.h rather than patching the existing c6x specific header. Signed-off-by: Mark Salter --- arch/c6x/include/asm/Kbuild | 1 + arch/c6x/include/asm/barrier.h | 27 --------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) delete mode 100644 arch/c6x/include/asm/barrier.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 3af601e31e66..f08e89183cda 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm generic-y += atomic.h generic-y += auxvec.h +generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += cputime.h diff --git a/arch/c6x/include/asm/barrier.h b/arch/c6x/include/asm/barrier.h deleted file mode 100644 index 538240e85909..000000000000 --- a/arch/c6x/include/asm/barrier.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_BARRIER_H -#define _ASM_C6X_BARRIER_H - -#define nop() asm("NOP\n"); - -#define mb() barrier() -#define rmb() barrier() -#define wmb() barrier() -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) - -#endif /* _ASM_C6X_BARRIER_H */ From e70cf54073acb6494620620af2ab993d57ae8d3f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 24 Sep 2012 14:57:58 -0400 Subject: [PATCH 35/91] tile: gxio iorpc numbering change for TRIO interface An ABI numbering change was made in the hypervisor for Tilera's 4.1 MDE release (just shipped). It's incompatible with the previous 4.0 release ABI numbering, so we track the new numbering going forward. We plan to avoid modifying ABI numbering for these interfaces again. Signed-off-by: Chris Metcalf --- arch/tile/include/gxio/iorpc_trio.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index 15fb77992083..58105c31228b 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -25,21 +25,23 @@ #include #include -#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) -#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1402) +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) -#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) -#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140f) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) -#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1417) -#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1418) -#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1419) -#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x141a) +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) -#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141c) -#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141d) -#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) From 212ea99a85d30dbc834888384c57ad5abbc67a0a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Sep 2012 16:58:49 +0000 Subject: [PATCH 36/91] phy/micrel: Implement support for KSZ8021 The KSZ8021 PHY was previously caught by KS8051, which is not correct. This PHY needs additional setup if it is strapped for address 0. In such case an reserved bit must be written in the 0x16, "Operation Mode Strap Override" register. According to the KS8051 datasheet, that bit means "PHY Address 0 in non-broadcast" and it indeed behaves as such on KSZ8021. The issue where the ethernet controller (Freescale FEC) did not communicate with network is fixed by writing this bit as 1. Signed-off-by: Marek Vasut Cc: David J. Choi Cc: David S. Miller Cc: Nobuhiro Iwamatsu Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 27 +++++++++++++++++++++++++++ include/linux/micrel_phy.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index cf287e0eb408..e8e00dc087f1 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -21,6 +21,12 @@ #include #include +/* Operation Mode Strap Override */ +#define MII_KSZPHY_OMSO 0x16 +#define KSZPHY_OMSO_B_CAST_OFF (1 << 9) +#define KSZPHY_OMSO_RMII_OVERRIDE (1 << 1) +#define KSZPHY_OMSO_MII_OVERRIDE (1 << 0) + /* general Interrupt control/status reg in vendor specific block. */ #define MII_KSZPHY_INTCS 0x1B #define KSZPHY_INTCS_JABBER (1 << 15) @@ -101,6 +107,13 @@ static int kszphy_config_init(struct phy_device *phydev) return 0; } +static int ksz8021_config_init(struct phy_device *phydev) +{ + const u16 val = KSZPHY_OMSO_B_CAST_OFF | KSZPHY_OMSO_RMII_OVERRIDE; + phy_write(phydev, MII_KSZPHY_OMSO, val); + return 0; +} + static int ks8051_config_init(struct phy_device *phydev) { int regval; @@ -127,6 +140,19 @@ static struct phy_driver ksphy_driver[] = { .ack_interrupt = kszphy_ack_interrupt, .config_intr = ks8737_config_intr, .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8021, + .phy_id_mask = 0x00ffffff, + .name = "Micrel KSZ8021", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = ksz8021_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KS8041, .phy_id_mask = 0x00fffff0, @@ -203,6 +229,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, { PHY_ID_KS8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, + { PHY_ID_KSZ8021, 0x00ffffff }, { PHY_ID_KS8041, 0x00fffff0 }, { PHY_ID_KS8051, 0x00fffff0 }, { } diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 61f0905bdc48..be7f366e37f0 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -5,6 +5,7 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 +#define PHY_ID_KSZ8021 0x00221555 #define PHY_ID_KS8041 0x00221510 #define PHY_ID_KS8051 0x00221550 /* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */ From 510d573fefed904a136b220b259bf48d99d5e381 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Sep 2012 16:58:50 +0000 Subject: [PATCH 37/91] phy/micrel: Rename KS80xx to KSZ80xx There is no such part as KS8001, KS8041 or KS8051. There are only KSZ8001, KSZ8041 and KSZ8051. Rename these parts as such to match the Micrel naming. Signed-off-by: Marek Vasut Cc: David J. Choi Cc: David S. Miller Cc: Nobuhiro Iwamatsu Cc: Linux ARM kernel Cc: Fabio Estevam Cc: Shawn Guo Signed-off-by: David S. Miller --- arch/arm/mach-mxs/mach-mxs.c | 2 +- drivers/net/phy/micrel.c | 18 +++++++++--------- include/linux/micrel_phy.h | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 8dabfe81d07c..ff886e01a0b0 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -261,7 +261,7 @@ static void __init apx4devkit_init(void) enable_clk_enet_out(); if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KS8051, MICREL_PHY_ID_MASK, + phy_register_fixup_for_uid(PHY_ID_KSZ8051, MICREL_PHY_ID_MASK, apx4devkit_phy_fixup); mxsfb_pdata.mode_list = apx4devkit_video_modes; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e8e00dc087f1..2165d5fdb8c0 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -154,9 +154,9 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8041, + .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, - .name = "Micrel KS8041", + .name = "Micrel KSZ8041", .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -167,9 +167,9 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8051, + .phy_id = PHY_ID_KSZ8051, .phy_id_mask = 0x00fffff0, - .name = "Micrel KS8051", + .name = "Micrel KSZ8051", .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -180,8 +180,8 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8001, - .name = "Micrel KS8001 or KS8721", + .phy_id = PHY_ID_KSZ8001, + .name = "Micrel KSZ8001 or KS8721", .phy_id_mask = 0x00ffffff, .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -227,11 +227,11 @@ MODULE_LICENSE("GPL"); static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, - { PHY_ID_KS8001, 0x00ffffff }, + { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, { PHY_ID_KSZ8021, 0x00ffffff }, - { PHY_ID_KS8041, 0x00fffff0 }, - { PHY_ID_KS8051, 0x00fffff0 }, + { PHY_ID_KSZ8041, 0x00fffff0 }, + { PHY_ID_KSZ8051, 0x00fffff0 }, { } }; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index be7f366e37f0..d02355d1fc31 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -6,10 +6,10 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 #define PHY_ID_KSZ8021 0x00221555 -#define PHY_ID_KS8041 0x00221510 -#define PHY_ID_KS8051 0x00221550 +#define PHY_ID_KSZ8041 0x00221510 +#define PHY_ID_KSZ8051 0x00221550 /* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */ -#define PHY_ID_KS8001 0x0022161A +#define PHY_ID_KSZ8001 0x0022161A /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 From 69190e67d4fac49e2f6ea22f46780d0c712214b8 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Sep 2012 16:58:51 +0000 Subject: [PATCH 38/91] phy/micrel: Add missing header to micrel_phy.h The license header was missing in micrel_phy.h . This patch adds one. Signed-off-by: Marek Vasut Cc: David J. Choi Cc: David S. Miller Cc: Nobuhiro Iwamatsu Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index d02355d1fc31..de201203bc7c 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -1,3 +1,15 @@ +/* + * include/linux/micrel_phy.h + * + * Micrel PHY IDs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + #ifndef _MICREL_PHY_H #define _MICREL_PHY_H From b1268d3737c6316016026245eef276eda6b0a621 Mon Sep 17 00:00:00 2001 From: Roland Stigge Date: Thu, 20 Sep 2012 10:48:03 +0200 Subject: [PATCH 39/91] gpio-lpc32xx: Fix value handling of gpio_direction_output() For GPIOs of gpio-lpc32xx, gpio_direction_output() ignores the value argument (initial value of output). This patch fixes this by setting the level accordingly. Cc: stable@kernel.org Signed-off-by: Roland Stigge Acked-by: Alexandre Pereira da Silva Signed-off-by: Linus Walleij --- drivers/gpio/gpio-lpc32xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 8a420f13905e..ed94b4ea72e9 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -308,6 +308,7 @@ static int lpc32xx_gpio_dir_output_p012(struct gpio_chip *chip, unsigned pin, { struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + __set_gpio_level_p012(group, pin, value); __set_gpio_dir_p012(group, pin, 0); return 0; @@ -318,6 +319,7 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin, { struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + __set_gpio_level_p3(group, pin, value); __set_gpio_dir_p3(group, pin, 0); return 0; @@ -326,6 +328,9 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin, static int lpc32xx_gpio_dir_out_always(struct gpio_chip *chip, unsigned pin, int value) { + struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + + __set_gpo_level_p3(group, pin, value); return 0; } From 3e10986d1d698140747fcfc2761ec9cb64c1d582 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Sep 2012 07:00:11 +0000 Subject: [PATCH 40/91] net: guard tcp_set_keepalive() to tcp sockets Its possible to use RAW sockets to get a crash in tcp_set_keepalive() / sk_reset_timer() Fix is to make sure socket is a SOCK_STREAM one. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 305792076121..a6000fbad294 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -691,7 +691,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); From 16d74ebeb1b056dfc80e69b44cc3a26515db1925 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 25 Sep 2012 11:51:05 +0900 Subject: [PATCH 41/91] sh: pfc: Fix up GPIO mux type reconfig case. Some drivers need to switch pin states between GPIO and pin function at runtime, which was inadvertently broken in the pinctrl driver for GPIOs being bound to a specific direction. This fixes up the request path to ensure that previously configured GPIOs don't cause us to inadvertently error out with an unsupported mux on reconfig, which in practice is primarily aimed at trapping pull-up/down users that have yet to be implemented under the new API. Fixes up regressions in the TPU PWM driver, amongst others. Reported-by: Laurent Pinchart Tested-by: Laurent Pinchart Signed-off-by: Paul Mundt --- drivers/sh/pfc/pinctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/sh/pfc/pinctrl.c b/drivers/sh/pfc/pinctrl.c index a3ac39b79192..0646bf6e7889 100644 --- a/drivers/sh/pfc/pinctrl.c +++ b/drivers/sh/pfc/pinctrl.c @@ -208,6 +208,8 @@ static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev, break; case PINMUX_TYPE_GPIO: + case PINMUX_TYPE_INPUT: + case PINMUX_TYPE_OUTPUT: break; default: pr_err("Unsupported mux type (%d), bailing...\n", pinmux_type); From bef83de5a0e3031ff6ff9584b458611a7ac01b85 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 24 Sep 2012 21:23:25 -0600 Subject: [PATCH 42/91] iommu: static inline iommu group stub functions Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7e83370e6fd2..f3b99e1c1042 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -256,72 +256,78 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline int iommu_attach_group(struct iommu_domain *domain, + struct iommu_group *group) { return -ENODEV; } -void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline void iommu_detach_group(struct iommu_domain *domain, + struct iommu_group *group) { } -struct iommu_group *iommu_group_alloc(void) +static inline struct iommu_group *iommu_group_alloc(void) { return ERR_PTR(-ENODEV); } -void *iommu_group_get_iommudata(struct iommu_group *group) +static inline void *iommu_group_get_iommudata(struct iommu_group *group) { return NULL; } -void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, - void (*release)(void *iommu_data)) +static inline void iommu_group_set_iommudata(struct iommu_group *group, + void *iommu_data, + void (*release)(void *iommu_data)) { } -int iommu_group_set_name(struct iommu_group *group, const char *name) +static inline int iommu_group_set_name(struct iommu_group *group, + const char *name) { return -ENODEV; } -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +static inline int iommu_group_add_device(struct iommu_group *group, + struct device *dev) { return -ENODEV; } -void iommu_group_remove_device(struct device *dev) +static inline void iommu_group_remove_device(struct device *dev) { } -int iommu_group_for_each_dev(struct iommu_group *group, void *data, - int (*fn)(struct device *, void *)) +static inline int iommu_group_for_each_dev(struct iommu_group *group, + void *data, + int (*fn)(struct device *, void *)) { return -ENODEV; } -struct iommu_group *iommu_group_get(struct device *dev) +static inline struct iommu_group *iommu_group_get(struct device *dev) { return NULL; } -void iommu_group_put(struct iommu_group *group) +static inline void iommu_group_put(struct iommu_group *group) { } -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_register_notifier(struct iommu_group *group, + struct notifier_block *nb) { return -ENODEV; } -int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_unregister_notifier(struct iommu_group *group, + struct notifier_block *nb) { return 0; } -int iommu_group_id(struct iommu_group *group) +static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; } From a140b98dcd9843f2e21091f2b64f10905150be16 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 24 Sep 2012 17:17:30 -0700 Subject: [PATCH 43/91] pwm-backlight: take over maintenance Since the pwm-backlight driver is lacking a proper maintainer and is the heaviest user of the PWM framework I'm taking over maintenance. Signed-off-by: Thierry Reding Acked-by: Arun Murthy Cc: Matthew Garrett Cc: Robert Morell Cc: Dilan Lee Cc: Axel Lin Cc: Mark Brown Cc: Alexandre Courbot Acked-by: Sachin Kamat Acked-by: Linus Walleij Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b17587d9412f..1796fa3a1bc1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5544,6 +5544,8 @@ F: Documentation/devicetree/bindings/pwm/ F: include/linux/pwm.h F: include/linux/of_pwm.h F: drivers/pwm/ +F: drivers/video/backlight/pwm_bl.c +F: include/linux/pwm_backlight.h PXA2xx/PXA3xx SUPPORT M: Eric Miao From 0e75898fe2499ece0ac3f1475982a828b3283b0d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Sep 2012 17:17:33 -0700 Subject: [PATCH 44/91] checksyscalls: fix "here document" handling "echo" doesn't read from stdin, therefore the checksyscalls script didn't warn about not implemented system calls anymore since 29dc54c6 ("checksyscalls: Use arch/x86/syscalls/syscall_32.tbl as source"). Use "cat" instead of "echo" which handles this correctly. Signed-off-by: Heiko Carstens Cc: Michal Marek Cc: H. Peter Anvin Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checksyscalls.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index d24810fc6af6..fd8fa9aa7c4e 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -200,7 +200,7 @@ EOF syscall_list() { grep '^[0-9]' "$1" | sort -n | ( while read nr abi name entry ; do - echo < Date: Mon, 24 Sep 2012 17:17:35 -0700 Subject: [PATCH 45/91] lib/flex_proportions.c: fix corruption of denominator in flexible proportions When racing with CPU hotplug, percpu_counter_sum() can return negative values for the number of observed events. This confuses fprop_new_period(), which uses unsigned type and as a result number of events is set to big *positive* number. From that moment on, things go pear shaped and can result e.g. in division by zero as denominator is later truncated to 32-bits. This bug causes a divide-by-zero oops in bdi_dirty_limit() in Borislav's 3.6.0-rc6 based kernel. Fix the issue by using a signed type in fprop_new_period(). That makes us bail out from the function without doing anything (mistakenly) thinking there are no events to age. That makes aging somewhat inaccurate but getting accurate data would be rather hard. Signed-off-by: Jan Kara Reported-by: Borislav Petkov Reported-by: Srivatsa S. Bhat Cc: Wu Fengguang Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/flex_proportions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index c785554f9523..ebf3bac460b0 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -62,7 +62,7 @@ void fprop_global_destroy(struct fprop_global *p) */ bool fprop_new_period(struct fprop_global *p, int periods) { - u64 events; + s64 events; unsigned long flags; local_irq_save(flags); From be8cfc4af15cf611dfeb66a1fb5df43d5f1e280a Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 24 Sep 2012 17:17:38 -0700 Subject: [PATCH 46/91] c/r: prctl: fix build error for no-MMU case Commit 1ad75b9e1628 ("c/r: prctl: add minimal address test to PR_SET_MM") added some address checking to prctl_set_mm() used by checkpoint-restore. This causes a build error for no-MMU systems: kernel/sys.c: In function 'prctl_set_mm': kernel/sys.c:1868:34: error: 'mmap_min_addr' undeclared (first use in this function) The test for mmap_min_addr doesn't make a lot of sense for no-MMU code as noted in commit 6e1415467614 ("NOMMU: Optimise away the {dac_,}mmap_min_addr tests"). This patch defines mmap_min_addr as 0UL in the no-MMU case so that the compiler will optimize away tests for "addr < mmap_min_addr". Signed-off-by: Mark Salter Reviewed-by: Cyrill Gorcunov Cc: [3.6.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/security.h b/include/linux/security.h index 3dea6a9d568f..d143b8e01954 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -118,6 +118,7 @@ void reset_security_ops(void); extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; #else +#define mmap_min_addr 0UL #define dac_mmap_min_addr 0UL #endif From 1b05c4b50edbddbdde715c4a7350629819f6655e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: [PATCH 47/91] ipv6: raw: fix icmpv6_filter() icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..4a5f78b50495 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; - - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) From 96af69ea2a83d292238bdba20e4508ee967cf8cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: [PATCH 48/91] ipv6: mip6: fix mip6_mh_filter() mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c31d87b..0f9bdc5ee9f3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } From 82e6bfe2fbc4d48852114c4f979137cd5bf1d1a8 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 22:26:52 +0000 Subject: [PATCH 49/91] netfilter: xt_limit: have r->cost != 0 case work Commit v2.6.19-rc1~1272^2~41 tells us that r->cost != 0 can happen when a running state is saved to userspace and then reinstated from there. Make sure that private xt_limit area is initialized with correct values. Otherwise, random matchings due to use of uninitialized memory. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_limit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8ab309..a4c1e4528cac 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } From c7ead11d0b498984169871fd03c57441862ec3f3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 26 Sep 2012 12:41:19 +1000 Subject: [PATCH 50/91] drm/nouveau: silence a debug message triggered by newer userspace Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ff23d88880e5..3ca240b4413d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -179,7 +179,7 @@ nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS) return 0; } else if (init->class == 0x906e) { - NV_ERROR(dev, "906e not supported yet\n"); + NV_DEBUG(dev, "906e not supported yet\n"); return -EINVAL; } From 79eee7aa0d4de5a42331c63d3c7c735248b53d0c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 26 Sep 2012 12:43:10 +1000 Subject: [PATCH 51/91] drm/nvc0/ltcg: mask off intr 0x10 NVIDIA do that at startup too on Fermi, so perhaps the heap of 0x10 intrs we receive are normal and we can ignore them. On Kepler NVIDIA *don't* do this, but the hardware appears to come up with the bit masked off by default - so that's probably why :) This should silence some interrupt spam seen on Fermi+ boards. Backported patch from reworked nouveau kernel tree. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvc0_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c index f704e942372e..f376c39310df 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fb.c +++ b/drivers/gpu/drm/nouveau/nvc0_fb.c @@ -124,6 +124,7 @@ nvc0_fb_init(struct drm_device *dev) priv = dev_priv->engine.fb.priv; nv_wr32(dev, 0x100c10, priv->r100c10 >> 8); + nv_mask(dev, 0x17e820, 0x00100000, 0x00000000); /* NV_PLTCG_INTR_EN */ return 0; } From 68c4fce737c4b963e336435f225621dc21138397 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 23 Sep 2012 19:33:55 +0300 Subject: [PATCH 52/91] vmwgfx: corruption in vmw_event_fence_action_create() We don't allocate enough data for this struct. As soon as we start modifying event->event on the next lines, then we're going beyond the end of the memory we allocated. Signed-off-by: Dan Carpenter cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index f2fb8f15e2f1..7e0743358dff 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1018,7 +1018,7 @@ int vmw_event_fence_action_create(struct drm_file *file_priv, } - event = kzalloc(sizeof(event->event), GFP_KERNEL); + event = kzalloc(sizeof(*event), GFP_KERNEL); if (unlikely(event == NULL)) { DRM_ERROR("Failed to allocate an event.\n"); ret = -ENOMEM; From 3a75885848996baab5276ff37ebf7295c3c753f0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 25 Sep 2012 16:17:43 +1000 Subject: [PATCH 53/91] drm/udl: limit modes to the sku pixel limits. Otherwise when X starts we commonly get a black screen scanning out nothing, its wierd dpms on/off from userspace brings it back, With this on F18, multi-seat works again with my 1920x1200 monitor which is above the sku limit for the device I have. Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_connector.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index ba055e9ca007..8d9dc44f1f94 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -69,6 +69,13 @@ static int udl_get_modes(struct drm_connector *connector) static int udl_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct udl_device *udl = connector->dev->dev_private; + if (!udl->sku_pixel_limit) + return 0; + + if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) + return MODE_VIRTUAL_Y; + return 0; } From d0e12f3ff3472cbd8f52d3c0e6ee07a841787c40 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 26 Sep 2012 11:57:30 +0100 Subject: [PATCH 54/91] ASoC: wm2000: Correct register size Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 3fd5b29dc933..a3acb7a85f6a 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -702,7 +702,7 @@ static bool wm2000_readable_reg(struct device *dev, unsigned int reg) } static const struct regmap_config wm2000_regmap = { - .reg_bits = 8, + .reg_bits = 16, .val_bits = 8, .max_register = WM2000_REG_IF_CTL, From 11ef4cfac935ab45eb4c7f98d26c78ee69627909 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 24 Sep 2012 15:19:26 -0400 Subject: [PATCH 55/91] syscalls: add __NR_kcmp syscall to generic unistd.h Commit d97b46a64 ("syscalls, x86: add __NR_kcmp syscall" ) added a new syscall to support checkpoint restore. It is currently x86-only, but that restriction will be removed in a subsequent patch. Unfortunately, the kernel checksyscalls script had a bug which suppressed any warning to other architectures that the kcmp syscall was not implemented. A patch to checksyscalls is being tested in linux-next and other architectures are seeing warnings about kcmp being unimplemented. This patch adds __NR_kcmp to so that kcmp is wired in for architectures using the generic syscall list. Signed-off-by: Mark Salter Acked-by: Arnd Bergmann --- include/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 991ef01cd77e..3748ec92dcbc 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -691,9 +691,11 @@ __SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ #define __NR_process_vm_writev 271 __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ compat_sys_process_vm_writev) +#define __NR_kcmp 272 +__SYSCALL(__NR_kcmp, sys_kcmp) #undef __NR_syscalls -#define __NR_syscalls 272 +#define __NR_syscalls 273 /* * All syscalls below here should go away really, From 7106891a5c5960782801366ac49e47aa996b662f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 25 Sep 2012 15:24:56 -0700 Subject: [PATCH 56/91] MAINTAINERS: update Intel C600 SAS driver maintainers Cc: Lukasz Dorau Cc: Maciej Patelczyk Signed-off-by: Dave Jiang Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1796fa3a1bc1..9a6c4da3b2ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3552,11 +3552,12 @@ K: \b(ABS|SYN)_MT_ INTEL C600 SERIES SAS CONTROLLER DRIVER M: Intel SCU Linux support +M: Lukasz Dorau +M: Maciej Patelczyk M: Dave Jiang -M: Ed Nadolski L: linux-scsi@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git -S: Maintained +T: git git://git.code.sf.net/p/intel-sas/isci +S: Supported F: drivers/scsi/isci/ F: firmware/isci/ From 307615a26e95406c42c95916a66ba50434567e0f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:39 +0100 Subject: [PATCH 57/91] dm thin: do not set discard_zeroes_data The dm thin pool target claims to support the zeroing of discarded data areas. This turns out to be incorrect when processing discards that do not exactly cover a complete number of blocks, so the target must always set discard_zeroes_data_unsupported. The thin pool target will zero blocks when they are allocated if the skip_block_zeroing feature is not specified. The block layer may send a discard that only partly covers a block. If a thin pool block is partially discarded then there is no guarantee that the discarded data will get zeroed before it is accessed again. Due to this, thin devices cannot claim discards will always zero data. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2ad..d4209231069d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2274,6 +2274,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * thin devices' discard limits consistent). */ ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2745,7 +2746,6 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) * boundary is not sent to this target. */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->discard_zeroes_data = pool->pf.zero_new_blocks; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) From 7ba10aa6fbac7158a50bec142132b04bc480bb29 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:41 +0100 Subject: [PATCH 58/91] dm mpath: only retry ioctl when no paths if queue_if_no_path set When there are no paths and multipath receives an ioctl, it waits until a path becomes available. This behaviour is incorrect if the "queue_if_no_path" setting was not specified, as then the ioctl should be rejected immediately, which this patch now does. commit 35991652b ("dm mpath: allow ioctls to trigger pg init") should have checked if queue_if_no_path was configured before queueing IO. Checking for the queue_if_no_path feature, like is done in map_io(), allows the following table load to work without blocking in the multipath_ioctl retry loop: echo "0 1024 multipath 0 0 0 0" | dmsetup create mpath_nodevs Without this fix the multipath_ioctl will block with the following stack trace: blkid D 0000000000000002 0 23936 1 0x00000000 ffff8802b89e5cd8 0000000000000082 ffff8802b89e5fd8 0000000000012440 ffff8802b89e4010 0000000000012440 0000000000012440 0000000000012440 ffff8802b89e5fd8 0000000000012440 ffff88030c2aab30 ffff880325794040 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x182/0x2e0 [] ? lock_timer_base+0x70/0x70 [] schedule_timeout_uninterruptible+0x1e/0x20 [] msleep+0x20/0x30 [] multipath_ioctl+0x109/0x170 [dm_multipath] [] dm_blk_ioctl+0xbc/0xd0 [dm_mod] [] __blkdev_driver_ioctl+0x28/0x30 [] blkdev_ioctl+0xce/0x730 [] block_ioctl+0x3c/0x40 [] do_vfs_ioctl+0x8c/0x340 [] ? sys_newfstat+0x33/0x40 [] sys_ioctl+0xa1/0xb0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.5+ Acked-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2f..034233eefc82 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct multipath *m = ti->private; + struct pgpath *pgpath; struct block_device *bdev; fmode_t mode; unsigned long flags; @@ -1570,12 +1571,14 @@ again: if (!m->current_pgpath) __choose_pgpath(m, 0); - if (m->current_pgpath) { - bdev = m->current_pgpath->path.dev->bdev; - mode = m->current_pgpath->path.dev->mode; + pgpath = m->current_pgpath; + + if (pgpath) { + bdev = pgpath->path.dev->bdev; + mode = pgpath->path.dev->mode; } - if (m->queue_io) + if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) r = -EAGAIN; else if (!bdev) r = -EIO; From ba1cbad93dd47223b1f3b8edd50dd9ef2abcb2ed Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:42 +0100 Subject: [PATCH 59/91] dm: handle requests beyond end of device instead of using BUG_ON The access beyond the end of device BUG_ON that was introduced to dm_request_fn via commit 29e4013de7ad950280e4b2208 ("dm: implement REQ_FLUSH/FUA support for request-based dm") was an overly drastic (but simple) response to this situation. I have received a report that this BUG_ON was hit and now think it would be better to use dm_kill_unmapped_request() to fail the clone and original request with -EIO. map_request() will assign the valid target returned by dm_table_find_target to tio->ti. But when the target isn't valid tio->ti is never assigned (because map_request isn't called); so add a check for tio->ti != NULL to dm_done(). Reported-by: Mike Christie Signed-off-by: Mike Snitzer Signed-off-by: Jun'ichi Nomura Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 56 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e09b6ff5b49..6748e0c4df1f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped) { int r = error; struct dm_rq_target_io *tio = clone->end_io_data; - dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; + dm_request_endio_fn rq_end_io = NULL; - if (mapped && rq_end_io) - r = rq_end_io(tio->ti, clone, error, &tio->info); + if (tio->ti) { + rq_end_io = tio->ti->type->rq_end_io; + + if (mapped && rq_end_io) + r = rq_end_io(tio->ti, clone, error, &tio->info); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone, int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; - /* - * Hold the md reference here for the in-flight I/O. - * We can't rely on the reference count by device opener, - * because the device may be closed during the request completion - * when all bios are completed. - * See the comment in rq_completed() too. - */ - dm_get(md); - tio->ti = ti; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { @@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone, return requeued; } +static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +{ + struct request *clone; + + blk_start_request(orig); + clone = orig->special; + atomic_inc(&md->pending[rq_data_dir(clone)]); + + /* + * Hold the md reference here for the in-flight I/O. + * We can't rely on the reference count by device opener, + * because the device may be closed during the request completion + * when all bios are completed. + * See the comment in rq_completed() too. + */ + dm_get(md); + + return clone; +} + /* * q->request_fn for request-based dm. * Called with the queue lock held. @@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); ti = dm_table_find_target(map, pos); - BUG_ON(!dm_target_is_valid(ti)); + if (!dm_target_is_valid(ti)) { + /* + * Must perform setup, that dm_done() requires, + * before calling dm_kill_unmapped_request + */ + DMERR_LIMIT("request attempted access beyond the end of device"); + clone = dm_start_request(md, rq); + dm_kill_unmapped_request(clone, -EIO); + continue; + } if (ti->type->busy && ti->type->busy(ti)) goto delay_and_out; - blk_start_request(rq); - clone = rq->special; - atomic_inc(&md->pending[rq_data_dir(clone)]); + clone = dm_start_request(md, rq); spin_unlock(q->queue_lock); if (map_request(ti, clone, md)) @@ -1684,8 +1706,6 @@ delay_and_out: blk_delay_queue(q, HZ / 10); out: dm_table_put(map); - - return; } int dm_underlying_device_busy(struct request_queue *q) From c3c4555edd10dbc0b388a0125b9c50de5e79af05 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 26 Sep 2012 23:45:43 +0100 Subject: [PATCH 60/91] dm table: clear add_random unless all devices have it set Always clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not have it set. Otherwise devices with predictable characteristics may contribute entropy. QUEUE_FLAG_ADD_RANDOM specifies whether or not queue IO timings contribute to the random pool. For bio-based targets this flag is always 0 because such devices have no real queue. For request-based devices this flag was always set to 1 by default. Now set it according to the flags on underlying devices. If there is at least one device which should not contribute, set the flag to zero: If a device, such as fast SSD storage, is not suitable for supplying entropy, a request-based queue stacked over it will not be either. Because the checking logic is exactly same as for the rotational flag, share the iteration function with device_is_nonrot(). Signed-off-by: Milan Broz Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f90069029aae..77b90ae66991 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1354,17 +1354,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, return q && blk_queue_nonrot(q); } -static bool dm_table_is_nonrot(struct dm_table *t) +static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_add_random(q); +} + +static bool dm_table_all_devices_attribute(struct dm_table *t, + iterate_devices_callout_fn func) { struct dm_target *ti; unsigned i = 0; - /* Ensure that all underlying device are non-rotational. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) + !ti->type->iterate_devices(ti, func, NULL)) return 0; } @@ -1396,13 +1404,23 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_discard_zeroes_data(t)) q->limits.discard_zeroes_data = 0; - if (dm_table_is_nonrot(t)) + /* Ensure that all underlying devices are non-rotational. */ + if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); dm_table_set_integrity(t); + /* + * Determine whether or not this queue's I/O timings contribute + * to the entropy pool, Only request-based targets use this. + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ + if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + /* * QUEUE_FLAG_STACKABLE must be set after all queue settings are * visible to other CPUs because, once the flag is set, incoming bios From 3ae706561637331aa578e52bb89ecbba5edcb7a9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:45 +0100 Subject: [PATCH 61/91] dm: retain table limits when swapping to new table with no devices Add a safety net that will re-use the DM device's existing limits in the event that DM device has a temporary table that doesn't have any component devices. This is to reduce the chance that requests not respecting the hardware limits will reach the device. DM recalculates queue limits based only on devices which currently exist in the table. This creates a problem in the event all devices are temporarily removed such as all paths being lost in multipath. DM will reset the limits to the maximum permissible, which can then assemble requests which exceed the limits of the paths when the paths are restored. The request will fail the blk_rq_check_limits() test when sent to a path with lower limits, and will be retried without end by multipath. This became a much bigger issue after v3.6 commit fe86cdcef ("block: do not artificially constrain max_sectors for stacking drivers"). Reported-by: David Jeffery Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 35 +++++++++++++++++++++++++++++++++++ drivers/md/dm.c | 15 ++++++++++++++- drivers/md/dm.h | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 77b90ae66991..100368eb7991 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +static int count_device(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned *num_devices = data; + + (*num_devices)++; + + return 0; +} + +/* + * Check whether a table has no data devices attached using each + * target's iterate_devices method. + * Returns false if the result is unknown because a target doesn't + * support iterate_devices. + */ +bool dm_table_has_no_data_devices(struct dm_table *table) +{ + struct dm_target *uninitialized_var(ti); + unsigned i = 0, num_devices = 0; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (!ti->type->iterate_devices) + return false; + + ti->type->iterate_devices(ti, count_device, &num_devices); + if (num_devices) + return false; + } + + return true; +} + /* * Establish the new table's queue_limits and validate them. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6748e0c4df1f..67ffa391edcf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2429,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *map = ERR_PTR(-EINVAL); + struct dm_table *live_map, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2439,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) if (!dm_suspended_md(md)) goto out; + /* + * If the new table has no data devices, retain the existing limits. + * This helps multipath with queue_if_no_path if all paths disappear, + * then new I/O is queued based on these limits, and then some paths + * reappear. + */ + if (dm_table_has_no_data_devices(table)) { + live_map = dm_get_live_table(md); + if (live_map) + limits = md->queue->limits; + dm_table_put(live_map); + } + r = dm_calculate_queue_limits(table, &limits); if (r) { map = ERR_PTR(r); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 52eef493d266..6a99fefaa743 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, From 9bc142dd755d360c08a91ecb107d218787a2e9db Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:46 +0100 Subject: [PATCH 62/91] dm thin: tidy discard support A little thin discard code refactoring to make the next patch (dm thin: fix discard support for data devices) more readable. Pull out a couple of functions (and uses bools instead of unsigned for features). No functional changes. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 64 +++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d4209231069d..e99f4134dbd7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode { struct pool_features { enum pool_mode mode; - unsigned zero_new_blocks:1; - unsigned discard_enabled:1; - unsigned discard_passdown:1; + bool zero_new_blocks:1; + bool discard_enabled:1; + bool discard_passdown:1; }; struct thin_c; @@ -1839,6 +1839,32 @@ static void __requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards. Disable discard_passdown if not; otherwise + * -EOPNOTSUPP will be returned. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt, + struct pool_features *pf) +{ + char buf[BDEVNAME_SIZE]; + + if (!pf->discard_passdown || data_dev_supports_discard(pt)) + return; + + DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", + bdevname(pt->data_dev->bdev, buf)); + + pf->discard_passdown = false; +} + static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; @@ -1855,23 +1881,9 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; pool->pf = pt->pf; - set_pool_mode(pool, new_mode); - /* - * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. - */ - /* FIXME: pull this out into a sep fn. */ - if (pt->pf.discard_passdown) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - if (!q || !blk_queue_discard(q)) { - char buf[BDEVNAME_SIZE]; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); - pool->pf.discard_passdown = 0; - } - } + disable_passdown_if_not_supported(pt, &pool->pf); + set_pool_mode(pool, new_mode); return 0; } @@ -1889,9 +1901,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) static void pool_features_init(struct pool_features *pf) { pf->mode = PM_WRITE; - pf->zero_new_blocks = 1; - pf->discard_enabled = 1; - pf->discard_passdown = 1; + pf->zero_new_blocks = true; + pf->discard_enabled = true; + pf->discard_passdown = true; } static void __pool_destroy(struct pool *pool) @@ -2119,13 +2131,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, argc--; if (!strcasecmp(arg_name, "skip_block_zeroing")) - pf->zero_new_blocks = 0; + pf->zero_new_blocks = false; else if (!strcasecmp(arg_name, "ignore_discard")) - pf->discard_enabled = 0; + pf->discard_enabled = false; else if (!strcasecmp(arg_name, "no_discard_passdown")) - pf->discard_passdown = 0; + pf->discard_passdown = false; else if (!strcasecmp(arg_name, "read_only")) pf->mode = PM_READ_ONLY; @@ -2261,6 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->pf = pf; ti->num_flush_requests = 1; + /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2268,6 +2281,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) */ if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_requests = 1; + /* * Setting 'discards_supported' circumvents the normal * stacking of discard limits (this keeps the pool and From 0424caa14508f19ca8093d36c15250e0331a3a0a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:47 +0100 Subject: [PATCH 63/91] dm thin: fix discard support for data devices The discard limits that get established for a thin-pool or thin device may be incompatible with the pool's data device. Avoid this by checking the discard limits of the pool's data device. If an incompatibility is found then the pool's 'discard passdown' feature is disabled. Change thin_io_hints to ensure that a thin device always uses the same queue limits as its pool device. Introduce requested_pf to track whether or not the table line originally contained the no_discard_passdown flag and use this directly for table output. We prepare the correct setting for discard_passdown directly in bind_control_target (called from pool_io_hints) and store it in adjusted_pf rather than waiting until we have access to pool->pf in pool_preresume. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 87 +++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e99f4134dbd7..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -580,7 +580,8 @@ struct pool_c { struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; - struct pool_features pf; + struct pool_features requested_pf; /* Features requested during table load */ + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ }; /* @@ -1848,21 +1849,36 @@ static bool data_dev_supports_discard(struct pool_c *pt) /* * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. + * supports discards. Disable discard_passdown if not. */ -static void disable_passdown_if_not_supported(struct pool_c *pt, - struct pool_features *pf) +static void disable_passdown_if_not_supported(struct pool_c *pt) { + struct pool *pool = pt->pool; + struct block_device *data_bdev = pt->data_dev->bdev; + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; + const char *reason = NULL; char buf[BDEVNAME_SIZE]; - if (!pf->discard_passdown || data_dev_supports_discard(pt)) + if (!pt->adjusted_pf.discard_passdown) return; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); + if (!data_dev_supports_discard(pt)) + reason = "discard unsupported"; - pf->discard_passdown = false; + else if (data_limits->max_discard_sectors < pool->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + else if (data_limits->discard_granularity > block_size) + reason = "discard granularity larger than a block"; + + else if (block_size & (data_limits->discard_granularity - 1)) + reason = "discard granularity not a factor of block size"; + + if (reason) { + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + pt->adjusted_pf.discard_passdown = false; + } } static int bind_control_target(struct pool *pool, struct dm_target *ti) @@ -1873,16 +1889,15 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) * We want to make sure that degraded pools are never upgraded. */ enum pool_mode old_mode = pool->pf.mode; - enum pool_mode new_mode = pt->pf.mode; + enum pool_mode new_mode = pt->adjusted_pf.mode; if (old_mode > new_mode) new_mode = old_mode; pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; - pool->pf = pt->pf; + pool->pf = pt->adjusted_pf; - disable_passdown_if_not_supported(pt, &pool->pf); set_pool_mode(pool, new_mode); return 0; @@ -2271,7 +2286,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->metadata_dev = metadata_dev; pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; - pt->pf = pf; + pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_requests = 1; /* @@ -2718,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - emit_flags(&pt->pf, result, sz, maxlen); + emit_flags(&pt->requested_pf, result, sz, maxlen); break; } @@ -2747,19 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { - /* - * FIXME: these limits may be incompatible with the pool's data device - */ + struct pool *pool = pt->pool; + struct queue_limits *data_limits; + limits->max_discard_sectors = pool->sectors_per_block; /* - * This is just a hint, and not enforced. We have to cope with - * bios that cover a block partially. A discard that spans a block - * boundary is not sent to this target. + * discard_granularity is just a hint, and not enforced. */ - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; + limits->discard_granularity = data_limits->discard_granularity; + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2769,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - if (pool->pf.discard_enabled) - set_discard_limits(pool, limits); + + /* + * pt->adjusted_pf is a staging area for the actual features to use. + * They get transferred to the live pool in bind_control_target() + * called from pool_preresume(). + */ + if (!pt->adjusted_pf.discard_enabled) + return; + + disable_passdown_if_not_supported(pt); + + set_discard_limits(pt, limits); } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3056,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +/* + * A thin device always inherits its queue limits from its pool. + */ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; - struct pool *pool = tc->pool; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - set_discard_limits(pool, limits); + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; } static struct target_type thin_target = { .name = "thin", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, From 1d55f6bcc0331d744cd5b56c4ee79e3809438161 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 26 Sep 2012 23:45:48 +0100 Subject: [PATCH 64/91] dm verity: fix overflow check This patch fixes sector_t overflow checking in dm-verity. Without this patch, the code checks for overflow only if sector_t is smaller than long long, not if sector_t and long long have the same size. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-verity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 254d19268ad2..892ae2766aa6 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->hash_dev_block_bits = ffs(num) - 1; if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid data blocks"; r = -EINVAL; goto bad; @@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid hash start"; r = -EINVAL; goto bad; From 833dd8224edda0bc1cfa1b0fd5cbe7a36fd59db8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 27 Sep 2012 09:13:43 +1000 Subject: [PATCH 65/91] drm/nvc0/fifo: ignore bits in PFIFO_INTR that aren't set in PFIFO_INTR_EN PFIFO_INTR = 0x40000000 appears to be a normal case on nvc0/nve0 PFIFO, the binary driver appears to completely ignore it in its PFIFO interrupt handler and even masks off the bit (as we do) in PFIFO_INTR_EN at init time. The bits still light up in the hardware sometimes though, so lets just ignore any bits we haven't explicitely requested. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvc0_fifo.c | 3 ++- drivers/gpu/drm/nouveau/nve0_fifo.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c index 7d85553d518c..cd39eb99f5b1 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c @@ -373,7 +373,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit) static void nvc0_fifo_isr(struct drm_device *dev) { - u32 stat = nv_rd32(dev, 0x002100); + u32 mask = nv_rd32(dev, 0x002140); + u32 stat = nv_rd32(dev, 0x002100) & mask; if (stat & 0x00000100) { NV_INFO(dev, "PFIFO: unknown status 0x00000100\n"); diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c index e98d144e6eb9..281bece751b6 100644 --- a/drivers/gpu/drm/nouveau/nve0_fifo.c +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c @@ -345,7 +345,8 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit) static void nve0_fifo_isr(struct drm_device *dev) { - u32 stat = nv_rd32(dev, 0x002100); + u32 mask = nv_rd32(dev, 0x002140); + u32 stat = nv_rd32(dev, 0x002100) & mask; if (stat & 0x00000100) { NV_INFO(dev, "PFIFO: unknown status 0x00000100\n"); From 84d5dfbf09be6be71d6dda63d99a8303e85663c7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 24 Sep 2012 07:54:33 +0200 Subject: [PATCH 66/91] ARM: Orion5x: Fix too small coherent pool. Some Orion5x devices allocate their coherent buffers from atomic context. Increase size of atomic coherent pool to make sure such the allocations won't fail during boot. Signed-off-by: Andrew Lunn Acked-by: Jason Cooper Signed-off-by: Olof Johansson --- arch/arm/mach-orion5x/common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 410291c67666..a6cd14ab1e4e 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -204,6 +204,13 @@ void __init orion5x_wdt_init(void) void __init orion5x_init_early(void) { orion_time_set_base(TIMER_VIRT_BASE); + + /* + * Some Orion5x devices allocate their coherent buffers from atomic + * context. Increase size of atomic coherent pool to make sure such + * the allocations won't fail. + */ + init_dma_coherent_pool_size(SZ_1M); } int orion5x_tclk; From 80b4812407c6b1f66a4f2430e69747a13f010839 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 27 Sep 2012 12:35:21 +1000 Subject: [PATCH 67/91] md/raid10: fix "enough" function for detecting if array is failed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'enough' function is written to work with 'near' arrays only in that is implicitly assumes that the offset from one 'group' of devices to the next is the same as the number of copies. In reality it is the number of 'near' copies. So change it to make this number explicit. This bug makes it possible to run arrays without enough drives present, which is dangerous. It is appropriate for an -stable kernel, but will almost certainly need to be modified for some of them. Cc: stable@vger.kernel.org Reported-by: Jakub Husák Signed-off-by: NeilBrown --- drivers/md/raid10.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1c2eb38f3c51..0138a727c1f3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1512,14 +1512,16 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore) do { int n = conf->copies; int cnt = 0; + int this = first; while (n--) { - if (conf->mirrors[first].rdev && - first != ignore) + if (conf->mirrors[this].rdev && + this != ignore) cnt++; - first = (first+1) % geo->raid_disks; + this = (this+1) % geo->raid_disks; } if (cnt == 0) return 0; + first = (first + geo->near_copies) % geo->raid_disks; } while (first != 0); return 1; } From 424597921ac1fc2a475a656ea5895cbe78f25d0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Sep 2012 03:24:18 -0400 Subject: [PATCH 68/91] um: take cleaning singlestep to start_thread() ... assuming it's needed to be done at all Signed-off-by: Al Viro --- arch/um/kernel/exec.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade9366364..4cab0c78684b 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -41,32 +41,18 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; + current->ptrace &= ~PT_DTRACE; +#ifdef SUBARCH_EXECVE1 + SUBARCH_EXECVE1(regs->regs); +#endif } EXPORT_SYMBOL(start_thread); -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) -{ - long error; - - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; -#ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); -#endif - task_unlock(current); - } - return error; -} - long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; - err = execve1(file, argv, env); + err = do_execve(file, argv, env, ¤t->thread.regs); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -81,7 +67,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + error = do_execve(filename, argv, env, ¤t->thread.regs); putname(filename); out: return error; From bf56d57638452a8ee9241c9ca082d59cd2ca4cc8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Sep 2012 23:20:33 -0400 Subject: [PATCH 69/91] um: don't leak floating point state and segment registers on execve() Signed-off-by: Al Viro --- arch/um/kernel/exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 4cab0c78684b..8c82786da823 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -39,6 +39,7 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { + get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; current->ptrace &= ~PT_DTRACE; From f9a38eace4498a5e9f6d2cdfc879d5444edc3a5f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Sep 2012 13:39:47 -0400 Subject: [PATCH 70/91] um: let signal_delivered() do SIGTRAP on singlestepping into handler ... rather than duplicating that in sigframe setup code (and doing that inconsistently, at that) Signed-off-by: Al Viro --- arch/um/kernel/signal.c | 6 +++++- arch/x86/um/signal.c | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc29..cc9c2350e417 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea13503..ba7363ecf896 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } From d2ce4e92fa4f79a5fdb4cc912b411280afe21697 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Sep 2012 09:28:25 -0400 Subject: [PATCH 71/91] um: kill thread->forking we only use that to tell copy_thread() done by syscall from that done by kernel_thread(). However, it's easier to do simply by checking PF_KTHREAD in thread flags. Merge sys_clone() guts for 32bit and 64bit, while we are at it... Signed-off-by: Al Viro --- arch/um/include/asm/processor-generic.h | 9 --------- arch/um/kernel/process.c | 8 ++++---- arch/um/kernel/syscall.c | 24 +++++++++++----------- arch/x86/um/shared/sysdep/syscalls.h | 2 ++ arch/x86/um/sys_call_table_32.c | 2 +- arch/x86/um/syscalls_32.c | 27 +++++++------------------ arch/x86/um/syscalls_64.c | 23 +++------------------ 7 files changed, 29 insertions(+), 66 deletions(-) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0d..33a6a2423bd2 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714a..c5f5afa50745 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee3..a4c6d8eee74c 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e41..ca255a805ed9 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37b..b5408cecac6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9d..db444c7218fe 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include +#include /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15a..adb08eb5c22a 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include +#include /* XXX This should get the constants from libc */ +#include long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) From 01bb6501779ed0b6dc6c55be34b49eaa6306fdd8 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Sun, 23 Sep 2012 22:56:00 +0200 Subject: [PATCH 72/91] USB: ohci-at91: fix null pointer in ohci_hcd_at91_overcurrent_irq Fixes the following NULL pointer dereference: [ 7.740000] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver [ 7.810000] Unable to handle kernel NULL pointer dereference at virtual address 00000028 [ 7.810000] pgd = c3a38000 [ 7.810000] [00000028] *pgd=23a8c831, *pte=00000000, *ppte=00000000 [ 7.810000] Internal error: Oops: 17 [#1] PREEMPT ARM [ 7.810000] Modules linked in: ohci_hcd(+) regmap_i2c snd_pcm usbcore snd_page_alloc at91_cf snd_timer pcmcia_rsrc snd soundcore gpio_keys regmap_spi pcmcia_core usb_common nls_base [ 7.810000] CPU: 0 Not tainted (3.6.0-rc6-mpa+ #264) [ 7.810000] PC is at __gpio_to_irq+0x18/0x40 [ 7.810000] LR is at ohci_hcd_at91_overcurrent_irq+0x24/0xb4 [ohci_hcd] [ 7.810000] pc : [] lr : [] psr: 40000093 [ 7.810000] sp : c3a11c40 ip : c3a11c50 fp : c3a11c4c [ 7.810000] r10: 00000000 r9 : c02dcd6e r8 : fefff400 [ 7.810000] r7 : 00000000 r6 : c02cc928 r5 : 00000030 r4 : c02dd168 [ 7.810000] r3 : c02e7350 r2 : ffffffea r1 : c02cc928 r0 : 00000000 [ 7.810000] Flags: nZcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 7.810000] Control: c000717f Table: 23a38000 DAC: 00000015 [ 7.810000] Process modprobe (pid: 285, stack limit = 0xc3a10270) [ 7.810000] Stack: (0xc3a11c40 to 0xc3a12000) [ 7.810000] 1c40: c3a11c6c c3a11c50 bf08f694 c01392cc c3a11c84 c2c38b00 c3806900 00000030 [ 7.810000] 1c60: c3a11ca4 c3a11c70 c0051264 bf08f680 c3a11cac c3a11c80 c003e764 c3806900 [ 7.810000] 1c80: c2c38b00 c02cb05c c02cb000 fefff400 c3806930 c3a11cf4 c3a11cbc c3a11ca8 [ 7.810000] 1ca0: c005142c c005123c c3806900 c3805a00 c3a11cd4 c3a11cc0 c0053f24 c00513e4 [ 7.810000] 1cc0: c3a11cf4 00000030 c3a11cec c3a11cd8 c005120c c0053e88 00000000 00000000 [ 7.810000] 1ce0: c3a11d1c c3a11cf0 c00124d0 c00511e0 01400000 00000001 00000012 00000000 [ 7.810000] 1d00: ffffffff c3a11d94 00000030 00000000 c3a11d34 c3a11d20 c005120c c0012438 [ 7.810000] 1d20: c001dac4 00000012 c3a11d4c c3a11d38 c0009b08 c00511e0 c00523fc 60000013 [ 7.810000] 1d40: c3a11d5c c3a11d50 c0008510 c0009ab4 c3a11ddc c3a11d60 c0008eb4 c00084f0 [ 7.810000] 1d60: 00000000 00000030 00000000 00000080 60000013 bf08f670 c3806900 c2c38b00 [ 7.810000] 1d80: 00000030 c3806930 00000000 c3a11ddc c3a11d88 c3a11da8 c0054190 c00523fc [ 7.810000] 1da0: 60000013 ffffffff c3a11dec c3a11db8 00000000 c2c38b00 bf08f670 c3806900 [ 7.810000] 1dc0: 00000000 00000080 c02cc928 00000030 c3a11e0c c3a11de0 c0052764 c00520d8 [ 7.810000] 1de0: c3a11dfc 00000000 00000000 00000002 bf090f61 00000004 c02cc930 c02cc928 [ 7.810000] 1e00: c3a11e4c c3a11e10 bf090978 c005269c bf090f61 c02cc928 bf093000 c02dd170 [ 7.810000] 1e20: c3a11e3c c02cc930 c02cc930 bf0911d0 bf0911d0 bf093000 c3a10000 00000000 [ 7.810000] 1e40: c3a11e5c c3a11e50 c0155b7c bf090808 c3a11e7c c3a11e60 c0154690 c0155b6c [ 7.810000] 1e60: c02cc930 c02cc964 bf0911d0 c3a11ea0 c3a11e9c c3a11e80 c015484c c01545e8 [ 7.810000] 1e80: 00000000 00000000 c01547e4 bf0911d0 c3a11ec4 c3a11ea0 c0152e58 c01547f4 [ 7.810000] 1ea0: c381b88c c384ab10 c2c10540 bf0911d0 00000000 c02d7518 c3a11ed4 c3a11ec8 [ 7.810000] 1ec0: c01544c0 c0152e0c c3a11efc c3a11ed8 c01536cc c01544b0 bf091075 c3a11ee8 [ 7.810000] 1ee0: bf049af0 bf09120c bf0911d0 00000000 c3a11f1c c3a11f00 c0154e9c c0153628 [ 7.810000] 1f00: bf049af0 bf09120c 000ae190 00000000 c3a11f2c c3a11f20 c0155f58 c0154e04 [ 7.810000] 1f20: c3a11f44 c3a11f30 bf093054 c0155f1c 00000000 00006a4f c3a11f7c c3a11f48 [ 7.810000] 1f40: c0008638 bf093010 bf09120c 000ae190 00000000 c00093c4 00006a4f bf09120c [ 7.810000] 1f60: 000ae190 00000000 c00093c4 00000000 c3a11fa4 c3a11f80 c004fdc4 c000859c [ 7.810000] 1f80: c3a11fa4 000ae190 00006a4f 00016eb8 000ad018 00000080 00000000 c3a11fa8 [ 7.810000] 1fa0: c0009260 c004fd58 00006a4f 00016eb8 000ae190 00006a4f 000ae100 00000000 [ 7.810000] 1fc0: 00006a4f 00016eb8 000ad018 00000080 000adba0 000ad208 00000000 000ad3d8 [ 7.810000] 1fe0: beaf7ae8 beaf7ad8 000172b8 b6e4e940 20000010 000ae190 00000000 00000000 [ 7.810000] Backtrace: [ 7.810000] [] (__gpio_to_irq+0x0/0x40) from [] (ohci_hcd_at91_overcurrent_irq+0x24/0xb4 [ohci_hcd]) [ 7.810000] [] (ohci_hcd_at91_overcurrent_irq+0x0/0xb4 [ohci_hcd]) from [] (handle_irq_event_percpu+0x38/0x1a8) [ 7.810000] r6:00000030 r5:c3806900 r4:c2c38b00 [ 7.810000] [] (handle_irq_event_percpu+0x0/0x1a8) from [] (handle_irq_event+0x58/0x7c) [ 7.810000] [] (handle_irq_event+0x0/0x7c) from [] (handle_simple_irq+0xac/0xd8) [ 7.810000] r5:c3805a00 r4:c3806900 [ 7.810000] [] (handle_simple_irq+0x0/0xd8) from [] (generic_handle_irq+0x3c/0x48) [ 7.810000] r4:00000030 [ 7.810000] [] (generic_handle_irq+0x0/0x48) from [] (gpio_irq_handler+0xa8/0xfc) [ 7.810000] r4:00000000 [ 7.810000] [] (gpio_irq_handler+0x0/0xfc) from [] (generic_handle_irq+0x3c/0x48) [ 7.810000] [] (generic_handle_irq+0x0/0x48) from [] (handle_IRQ+0x64/0x88) [ 7.810000] r4:00000012 [ 7.810000] [] (handle_IRQ+0x0/0x88) from [] (at91_aic_handle_irq+0x30/0x38) [ 7.810000] r5:60000013 r4:c00523fc [ 7.810000] [] (at91_aic_handle_irq+0x0/0x38) from [] (__irq_svc+0x34/0x60) [ 7.810000] Exception stack(0xc3a11d60 to 0xc3a11da8) [ 7.810000] 1d60: 00000000 00000030 00000000 00000080 60000013 bf08f670 c3806900 c2c38b00 [ 7.810000] 1d80: 00000030 c3806930 00000000 c3a11ddc c3a11d88 c3a11da8 c0054190 c00523fc [ 7.810000] 1da0: 60000013 ffffffff [ 7.810000] [] (__setup_irq+0x0/0x458) from [] (request_threaded_irq+0xd8/0x134) [ 7.810000] [] (request_threaded_irq+0x0/0x134) from [] (ohci_hcd_at91_drv_probe+0x180/0x41c [ohci_hcd]) [ 7.810000] [] (ohci_hcd_at91_drv_probe+0x0/0x41c [ohci_hcd]) from [] (platform_drv_probe+0x20/0x24) [ 7.810000] [] (platform_drv_probe+0x0/0x24) from [] (driver_probe_device+0xb8/0x20c) [ 7.810000] [] (driver_probe_device+0x0/0x20c) from [] (__driver_attach+0x68/0x88) [ 7.810000] r7:c3a11ea0 r6:bf0911d0 r5:c02cc964 r4:c02cc930 [ 7.810000] [] (__driver_attach+0x0/0x88) from [] (bus_for_each_dev+0x5c/0x9c) [ 7.810000] r6:bf0911d0 r5:c01547e4 r4:00000000 [ 7.810000] [] (bus_for_each_dev+0x0/0x9c) from [] (driver_attach+0x20/0x28) [ 7.810000] r7:c02d7518 r6:00000000 r5:bf0911d0 r4:c2c10540 [ 7.810000] [] (driver_attach+0x0/0x28) from [] (bus_add_driver+0xb4/0x22c) [ 7.810000] [] (bus_add_driver+0x0/0x22c) from [] (driver_register+0xa8/0x144) [ 7.810000] r7:00000000 r6:bf0911d0 r5:bf09120c r4:bf049af0 [ 7.810000] [] (driver_register+0x0/0x144) from [] (platform_driver_register+0x4c/0x60) [ 7.810000] r7:00000000 r6:000ae190 r5:bf09120c r4:bf049af0 [ 7.810000] [] (platform_driver_register+0x0/0x60) from [] (ohci_hcd_mod_init+0x54/0x8c [ohci_hcd]) [ 7.810000] [] (ohci_hcd_mod_init+0x0/0x8c [ohci_hcd]) from [] (do_one_initcall+0xac/0x174) [ 7.810000] r4:00006a4f [ 7.810000] [] (do_one_initcall+0x0/0x174) from [] (sys_init_module+0x7c/0x1a0) [ 7.810000] [] (sys_init_module+0x0/0x1a0) from [] (ret_fast_syscall+0x0/0x2c) [ 7.810000] r7:00000080 r6:000ad018 r5:00016eb8 r4:00006a4f [ 7.810000] Code: e24cb004 e59f3028 e1a02000 e7930180 (e5903028) [ 7.810000] ---[ end trace 85aa37ed128143b5 ]--- [ 7.810000] Kernel panic - not syncing: Fatal exception in interrupt Commit 6fffb77c (USB: ohci-at91: fix PIO handling in relation with number of ports) started setting unused pins to EINVAL. But this exposed a bug in the ohci_hcd_at91_overcurrent_irq function where the gpio was used without being checked to see if it is valid. This patches fixed the issue by adding the gpio valid check. Signed-off-by: Joachim Eastwood Cc: stable # [3.4+] whereever 6fffb77c went Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index aaa8d2bce217..0bf72f943b00 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -467,7 +467,8 @@ static irqreturn_t ohci_hcd_at91_overcurrent_irq(int irq, void *data) /* From the GPIO notifying the over-current situation, find * out the corresponding port */ at91_for_each_port(port) { - if (gpio_to_irq(pdata->overcurrent_pin[port]) == irq) { + if (gpio_is_valid(pdata->overcurrent_pin[port]) && + gpio_to_irq(pdata->overcurrent_pin[port]) == irq) { gpio = pdata->overcurrent_pin[port]; break; } From 0d00dc2611abbe6ad244d50569c2ee82ce42846c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 26 Sep 2012 13:09:53 -0400 Subject: [PATCH 73/91] USB: Fix race condition when removing host controllers This patch (as1607) fixes a race that can occur if a USB host controller is removed while a process is reading the /sys/kernel/debug/usb/devices file. The usb_device_read() routine uses the bus->root_hub pointer to determine whether or not the root hub is registered. The is not a valid test, because the pointer is set before the root hub gets registered and remains set even after the root hub is unregistered and deallocated. As a result, usb_device_read() or usb_device_dump() can access freed memory, causing an oops. The patch changes the test to use the hcd->rh_registered flag, which does get set and cleared at the appropriate times. It also makes sure to hold the usb_bus_list_lock mutex while setting the flag, so that usb_device_read() will become aware of new root hubs as soon as they are registered. Signed-off-by: Alan Stern Reported-by: Don Zickus Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devices.c | 2 +- drivers/usb/core/hcd.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index d95696584762..3440812b4a84 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -624,7 +624,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf, /* print devices for all busses */ list_for_each_entry(bus, &usb_bus_list, bus_list) { /* recurse through all children of the root hub */ - if (!bus->root_hub) + if (!bus_to_hcd(bus)->rh_registered) continue; usb_lock_device(bus->root_hub); ret = usb_device_dump(&buf, &nbytes, &skip_bytes, ppos, diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index bc84106ac057..75ba2091f9b4 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1011,10 +1011,7 @@ static int register_root_hub(struct usb_hcd *hcd) if (retval) { dev_err (parent_dev, "can't register root hub for %s, %d\n", dev_name(&usb_dev->dev), retval); - } - mutex_unlock(&usb_bus_list_lock); - - if (retval == 0) { + } else { spin_lock_irq (&hcd_root_hub_lock); hcd->rh_registered = 1; spin_unlock_irq (&hcd_root_hub_lock); @@ -1023,6 +1020,7 @@ static int register_root_hub(struct usb_hcd *hcd) if (HCD_DEAD(hcd)) usb_hc_died (hcd); /* This time clean up */ } + mutex_unlock(&usb_bus_list_lock); return retval; } From 7f8436a1269eaaf2d0b1054a325eddf4e14cb80d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 24 Sep 2012 18:29:01 +0000 Subject: [PATCH 74/91] l2tp: fix return value check In case of error, the function genlmsg_put() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6f936358d664 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } @@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || From a326e6dd315ee38c280b7ecf8a0d777952e384cd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 24 Sep 2012 18:29:35 +0000 Subject: [PATCH 75/91] team: fix return value check In case of error, the function genlmsg_put() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 3ffe8a6e3c8b..f8cd61f449a4 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1653,8 +1653,8 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -EMSGSIZE; goto err_msg_put; } @@ -1848,8 +1848,8 @@ start_again: hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2068,8 +2068,8 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; From beb5ac20b3f90ffabac1eecd3c00205255df0728 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 26 Sep 2012 19:51:58 +0000 Subject: [PATCH 76/91] netdev: pasemi: fix return value check in pasemi_mac_phy_init() In case of error, the function of_phy_connect() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/pasemi/pasemi_mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index e559dfa06d6a..6fa74d530e44 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1101,9 +1101,9 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); - if (IS_ERR(phydev)) { + if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); - return PTR_ERR(phydev); + return -ENODEV; } mac->phydev = phydev; From bbb35efcda41d589cfff5e2b08c5fb457791117c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 27 Sep 2012 20:10:57 +0200 Subject: [PATCH 77/91] um: Fix IPC on um MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c1d7e01d (ipc: use Kconfig options for __ARCH_WANT_[COMPAT_]IPC_PARSE_VERSION) forgot UML and broke IPC on it. Also UML has to select ARCH_WANT_IPC_PARSE_VERSION usin Kconfig. Reported-and-tested-by: Signed-off-by: Richard Weinberger --- arch/x86/um/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772d..aeaff8bef2f1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT From 9429ec96c2718c0d1e3317cf60a87a0405223814 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Aug 2012 20:15:05 +0200 Subject: [PATCH 78/91] um: Preinclude include/linux/kern_levels.h The userspace part of UML uses the asm-offsets.h generator mechanism to create definitions for UM_KERN_ that match the in-kernel KERN_ constant definitions. As of commit 04d2c8c83d0e3ac5f78aeede51babb3236200112 ("printk: convert the format for KERN_ to a 2 byte pattern"), KERN_ is no longer expanded to the literal '""', but to '"\001" "LEVEL"', i.e. it contains two parts. However, the combo of DEFINE_STR() in arch/x86/um/shared/sysdep/kernel-offsets.h and sed-y in Kbuild doesn't support string literals consisting of multiple parts. Hence for all UM_KERN_ definitions, only the SOH character is retained in the actual definition, while the remainder ends up in the comment. E.g. in include/generated/asm-offsets.h we get #define UM_KERN_INFO "\001" /* "6" KERN_INFO */ instead of #define UM_KERN_INFO "\001" "6" /* KERN_INFO */ This causes spurious '^A' output in some kernel messages: Calibrating delay loop... 4640.76 BogoMIPS (lpj=23203840) pid_max: default: 32768 minimum: 301 Mount-cache hash table entries: 256 ^AChecking that host ptys support output SIGIO...Yes ^AChecking that host ptys support SIGIO on close...No, enabling workaround ^AUsing 2.6 host AIO NET: Registered protocol family 16 bio: create slab at 0 Switching to clocksource itimer To fix this: - Move the mapping from UM_KERN_ to KERN_ from arch/um/include/shared/common-offsets.h to arch/um/include/shared/user.h, which is preincluded for all userspace parts, - Preinclude include/linux/kern_levels.h for all userspace parts, to obtain the in-kernel KERN_ constant definitions. This doesn't violate the kernel/userspace separation, as include/linux/kern_levels.h is self-contained and doesn't expose any other kernel internals. - Remove the now unused STR() and DEFINE_STR() macros. Signed-off-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger --- arch/um/include/shared/common-offsets.h | 10 ---------- arch/um/include/shared/user.h | 11 +++++++++++ arch/um/scripts/Makefile.rules | 2 +- arch/x86/um/shared/sysdep/kernel-offsets.h | 3 --- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 40db8f71deae..2df313b6a586 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); -DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); -DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); -DEFINE_STR(UM_KERN_ERR, KERN_ERR); -DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); -DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); -DEFINE_STR(UM_KERN_INFO, KERN_INFO); -DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE_STR(UM_KERN_CONT, KERN_CONT); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 4fa82c055aab..cef068563336 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -26,6 +26,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + #ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index d50270d26b42..15889df9b466 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS:.o=.%): \ - c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o) + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of # using it directly. diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5eef..46a9df99f3c5 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ From a3cff128c970da8400875c8893d68ac735e783fb Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Mon, 24 Sep 2012 04:42:59 +0000 Subject: [PATCH 79/91] smsc75xx: fix resume after device reset On some systems this device fails to properly resume after suspend, this patch fixes it by running the usbnet_resume handler. I suspect this also fixes this bug: http://code.google.com/p/chromium-os/issues/detail?id=31871 Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index f5ab6e613ec8..376143e8a1aa 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1253,6 +1253,7 @@ static struct usb_driver smsc75xx_driver = { .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, + .reset_resume = usbnet_resume, .disconnect = usbnet_disconnect, .disable_hub_initiated_lpm = 1, }; From b629820d18fa65cc598390e4b9712fd5f83ee693 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 25 Sep 2012 10:17:42 +0000 Subject: [PATCH 80/91] net: phy: smsc: Implement PHY config_init for LAN87xx The LAN8710/LAN8720 chips do have broken the "FlexPWR" smart power-saving capability. Enabling it leads to the PHY not being able to detect Link when cold-started without cable connected. Thus, make sure this is disabled. Signed-off-by: Marek Vasut Cc: Christian Hohnstaedt Cc: David S. Miller Cc: Fabio Estevam Cc: Giuseppe Cavallaro Cc: Otavio Salvador Acked-by: Otavio Salvador Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 6d6192316b30..88e3991464e7 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -56,6 +56,32 @@ static int smsc_phy_config_init(struct phy_device *phydev) return smsc_phy_ack_interrupt (phydev); } +static int lan87xx_config_init(struct phy_device *phydev) +{ + /* + * Make sure the EDPWRDOWN bit is NOT set. Setting this bit on + * LAN8710/LAN8720 PHY causes the PHY to misbehave, likely due + * to a bug on the chip. + * + * When the system is powered on with the network cable being + * disconnected all the way until after ifconfig ethX up is + * issued for the LAN port with this PHY, connecting the cable + * afterwards does not cause LINK change detection, while the + * expected behavior is the Link UP being detected. + */ + int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + + rc &= ~MII_LAN83C185_EDPWRDOWN; + + rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, rc); + if (rc < 0) + return rc; + + return smsc_phy_ack_interrupt(phydev); +} + static int lan911x_config_init(struct phy_device *phydev) { return smsc_phy_ack_interrupt(phydev); @@ -162,7 +188,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_config_init, /* IRQ related */ .ack_interrupt = smsc_phy_ack_interrupt, From fd51790949edbbd17633689d4e19fe26d8447764 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Sep 2012 16:35:51 -0400 Subject: [PATCH 81/91] trivial select_parent documentation fix "Search list for X" sounds like you're trying to find X on a list. Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 16521a9f2038..0364af2311f4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1141,7 +1141,7 @@ rename_retry: EXPORT_SYMBOL(have_submounts); /* - * Search the dentry child list for the specified parent, + * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue From 64f605c77408ad3aaff025e849d2e42cad9687e3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 26 Sep 2012 07:22:02 +0000 Subject: [PATCH 82/91] bnx2: Clean up remaining iounmap commit c0357e975afdbbedab5c662d19bef865f02adc17 modified bnx2 to switch from using ioremap/iounmap to pci_iomap/pci_iounmap. They missed a spot in the error path of bnx2_init_one though. This patch just cleans that up. Signed-off-by: Neil Horman CC: Michael Chan CC: "David S. Miller" Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 79cebd8525ce..e48312f2305d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8564,7 +8564,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - iounmap(bp->regview); + pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); From 68b3f28c11bf896be32ad2a2e151aaa5f4a7c98e Mon Sep 17 00:00:00 2001 From: Narendra K Date: Tue, 25 Sep 2012 07:53:19 +0000 Subject: [PATCH 83/91] qlcnic: Fix scheduling while atomic bug In the device close path, 'qlcnic_fw_destroy_ctx' and 'qlcnic_poll_rsp' call msleep. But 'qlcnic_fw_destroy_ctx' and 'qlcnic_poll_rsp' are called with 'adapter->tx_clean_lock' spin lock held resulting in scheduling while atomic bug causing the following trace. I observed that the commit 012dc19a45b2b9cc2ebd14aaa401cf782c2abba4 from John Fastabend addresses a similar issue in ixgbevf driver. Adopting the same approach used in the commit, this patch uses mdelay to address the issue. [79884.999115] BUG: scheduling while atomic: ip/30846/0x00000002 [79885.005562] INFO: lockdep is turned off. [79885.009958] Modules linked in: qlcnic fuse nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE bnep bluetooth rfkill ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables iptable_nat nf_nat iptable_mangle ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables dcdbas coretemp kvm_intel kvm iTCO_wdt ixgbe iTCO_vendor_support crc32c_intel ghash_clmulni_intel nfsd microcode sb_edac pcspkr edac_core dca bnx2x shpchp auth_rpcgss nfs_acl lpc_ich mfd_core mdio lockd libcrc32c wmi acpi_pad acpi_power_meter sunrpc uinput sd_mod sr_mod cdrom crc_t10dif ahci libahci libata megaraid_sas usb_storage dm_mirror dm_region_hash dm_log dm_mod [last unloaded: qlcnic] [79885.083608] Pid: 30846, comm: ip Tainted: G W O 3.6.0-rc7+ #1 [79885.090805] Call Trace: [79885.093569] [] __schedule_bug+0x68/0x76 [79885.099699] [] __schedule+0x99e/0xa00 [79885.105634] [] schedule+0x29/0x70 [79885.111186] [] schedule_timeout+0x16f/0x350 [79885.117724] [] ? init_object+0x4a/0x90 [79885.123770] [] ? __internal_add_timer+0x140/0x140 [79885.130873] [] schedule_timeout_uninterruptible+0x1e/0x20 [79885.138773] [] msleep+0x20/0x30 [79885.144159] [] qlcnic_issue_cmd+0xef/0x290 [qlcnic] [79885.151478] [] qlcnic_fw_cmd_destroy_rx_ctx+0x55/0x90 [qlcnic] [79885.159868] [] qlcnic_fw_destroy_ctx+0x2d/0xa0 [qlcnic] [79885.167576] [] __qlcnic_down+0x11d/0x180 [qlcnic] [79885.174708] [] qlcnic_close+0x18/0x20 [qlcnic] [79885.181547] [] __dev_close_many+0x95/0xe0 [79885.187899] [] __dev_close+0x38/0x50 [79885.193761] [] __dev_change_flags+0xa1/0x180 [79885.200419] [] dev_change_flags+0x28/0x70 [79885.206779] [] do_setlink+0x378/0xa00 [79885.212731] [] ? nla_parse+0x31/0xe0 [79885.218612] [] rtnl_newlink+0x37e/0x560 [79885.224768] [] ? selinux_capable+0x39/0x50 [79885.231217] [] ? security_capable+0x18/0x20 [79885.237765] [] rtnetlink_rcv_msg+0x114/0x2f0 [79885.244412] [] ? rtnl_lock+0x17/0x20 [79885.250280] [] ? rtnl_lock+0x17/0x20 [79885.256148] [] ? __rtnl_unlock+0x20/0x20 [79885.262413] [] netlink_rcv_skb+0xa1/0xb0 [79885.268661] [] rtnetlink_rcv+0x25/0x40 [79885.274727] [] netlink_unicast+0x19d/0x220 [79885.281146] [] netlink_sendmsg+0x305/0x3f0 [79885.287595] [] ? sock_update_classid+0x148/0x2e0 [79885.294650] [] sock_sendmsg+0xbc/0xf0 [79885.300600] [] __sys_sendmsg+0x3ac/0x3c0 [79885.306853] [] ? up_read+0x23/0x40 [79885.312510] [] ? do_page_fault+0x2bc/0x570 [79885.318968] [] ? sys_brk+0x44/0x150 [79885.324715] [] ? fget_light+0x24c/0x520 [79885.330875] [] sys_sendmsg+0x49/0x90 [79885.336707] [] system_call_fastpath+0x16/0x1b Signed-off-by: Narendra K Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index b8ead696141e..2a179d087207 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -15,7 +15,7 @@ qlcnic_poll_rsp(struct qlcnic_adapter *adapter) do { /* give atleast 1ms for firmware to respond */ - msleep(1); + mdelay(1); if (++timeout > QLCNIC_OS_CRB_RETRY_COUNT) return QLCNIC_CDRP_RSP_TIMEOUT; @@ -601,7 +601,7 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) qlcnic_fw_cmd_destroy_tx_ctx(adapter); /* Allow dma queues to drain after context reset */ - msleep(20); + mdelay(20); } } From bc9259a8bae9e814fc1f775a1b3effa13e6ad330 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 27 Sep 2012 04:11:00 +0000 Subject: [PATCH 84/91] inetpeer: fix token initialization When jiffies wraps around (for example, 5 minutes after the boot, see INITIAL_JIFFIES) and peer has just been created, now - peer->rate_last can be < XRLIM_BURST_FACTOR * timeout, so token is not set to the maximum value, thus some icmp packets can be unexpectedly dropped. Fix this case by initializing last_rate to 60 seconds in the past. Signed-off-by: Nicolas Dichtel Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd34..c7527f6b9ad9 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ From 8dce30c89113e314d29d3b8f362aadff8087fccb Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 27 Sep 2012 10:26:01 +0200 Subject: [PATCH 85/91] ALSA: snd-usb: fix next_packet_size calls for pause case Also fix the calls to next_packet_size() for the pause case. This was missed in 245baf983 ("ALSA: snd-usb: fix calls to next_packet_size"). Signed-off-by: Daniel Mack Reviewed-by: Takashi Iwai Reported-and-tested-by: Christian Tefzer Cc: stable@kernel.org [ Taking directly because Takashi is on vacation - Linus ] Signed-off-by: Linus Torvalds --- sound/usb/endpoint.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index d6e2bb49c59c..060dccb9ec75 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -197,7 +197,13 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep, /* no data provider, so send silence */ unsigned int offs = 0; for (i = 0; i < ctx->packets; ++i) { - int counts = ctx->packet_size[i]; + int counts; + + if (ctx->packet_size[i]) + counts = ctx->packet_size[i]; + else + counts = snd_usb_endpoint_next_packet_size(ep); + urb->iso_frame_desc[i].offset = offs * ep->stride; urb->iso_frame_desc[i].length = counts * ep->stride; offs += counts; From df555b665367f9de6c04826acc482096f17c243d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 27 Sep 2012 19:04:21 +0000 Subject: [PATCH 86/91] netdev: octeon: fix return value check in octeon_mgmt_init_phy() In case of error, the function of_phy_connect() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/octeon/octeon_mgmt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/octeon/octeon_mgmt.c b/drivers/net/ethernet/octeon/octeon_mgmt.c index c42bbb16cdae..a688a2ddcfd6 100644 --- a/drivers/net/ethernet/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/octeon/octeon_mgmt.c @@ -722,10 +722,8 @@ static int octeon_mgmt_init_phy(struct net_device *netdev) octeon_mgmt_adjust_link, 0, PHY_INTERFACE_MODE_MII); - if (IS_ERR(p->phydev)) { - p->phydev = NULL; + if (!p->phydev) return -1; - } phy_start_aneg(p->phydev); From 0774e392555a128cff7a94929b9ce957927fef49 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 28 Sep 2012 16:14:44 +0200 Subject: [PATCH 87/91] iommu/amd: Fix wrong assumption in iommu-group specific code The new IOMMU groups code in the AMD IOMMU driver makes the assumption that there is a pci_dev struct available for all device-ids listed in the IVRS ACPI table. Unfortunatly this assumption is not true and so this code causes a NULL pointer dereference at boot on some systems. Fix it by making sure the given pointer is never NULL when passed to the group specific code. The real fix is larger and will be queued for v3.7. Reported-by: Florian Dazinger Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b64502dfa9f4..e89daf1b21b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -266,7 +266,7 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) static int iommu_init_device(struct device *dev) { - struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev); + struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; struct iommu_group *group; u16 alias; @@ -293,7 +293,9 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } else + } + + if (dma_pdev == NULL) dma_pdev = pci_dev_get(pdev); /* Account for quirked devices */ From 99a1300e1d84709f419182bb5189760e78234882 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 28 Sep 2012 14:35:31 +0200 Subject: [PATCH 88/91] thp: avoid VM_BUG_ON page_count(page) false positives in __collapse_huge_page_copy Speculative cache pagecache lookups can elevate the refcount from under us, so avoid the false positive. If the refcount is < 2 we'll be notified by a VM_BUG_ON in put_page_testzero as there are two put_page(src_page) in a row before returning from this function. Signed-off-by: Andrea Arcangeli Reviewed-by: Rik van Riel Reviewed-by: Johannes Weiner Cc: Hugh Dickins Cc: Mel Gorman Cc: Petr Holasek Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 57c4b9309015..141dbb695097 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1811,7 +1811,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); VM_BUG_ON(page_mapcount(src_page) != 1); - VM_BUG_ON(page_count(src_page) != 2); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to From 9c603e53d380459fb62fec7cd085acb0b74ac18f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 8 Sep 2012 12:57:30 -0700 Subject: [PATCH 89/91] mtdchar: fix offset overflow detection Sasha Levin has been running trinity in a KVM tools guest, and was able to trigger the BUG_ON() at arch/x86/mm/pat.c:279 (verifying the range of the memory type). The call trace showed that it was mtdchar_mmap() that created an invalid remap_pfn_range(). The problem is that mtdchar_mmap() does various really odd and subtle things with the vma page offset etc, and uses the wrong types (and the wrong overflow) detection for it. For example, the page offset may well be 32-bit on a 32-bit architecture, but after shifting it up by PAGE_SHIFT, we need to use a potentially 64-bit resource_size_t to correctly hold the full value. Also, we need to check that the vma length plus offset doesn't overflow before we check that it is smaller than the length of the mtdmap region. This fixes things up and tries to make the code a bit easier to read. Reported-and-tested-by: Sasha Levin Acked-by: Suresh Siddha Acked-by: Artem Bityutskiy Cc: David Woodhouse Cc: linux-mtd@lists.infradead.org Signed-off-by: Linus Torvalds --- drivers/mtd/mtdchar.c | 48 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index f2f482bec573..a6e74514e662 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1123,6 +1123,33 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, } #endif +static inline unsigned long get_vm_size(struct vm_area_struct *vma) +{ + return vma->vm_end - vma->vm_start; +} + +static inline resource_size_t get_vm_offset(struct vm_area_struct *vma) +{ + return (resource_size_t) vma->vm_pgoff << PAGE_SHIFT; +} + +/* + * Set a new vm offset. + * + * Verify that the incoming offset really works as a page offset, + * and that the offset and size fit in a resource_size_t. + */ +static inline int set_vm_offset(struct vm_area_struct *vma, resource_size_t off) +{ + pgoff_t pgoff = off >> PAGE_SHIFT; + if (off != (resource_size_t) pgoff << PAGE_SHIFT) + return -EINVAL; + if (off + get_vm_size(vma) - 1 < off) + return -EINVAL; + vma->vm_pgoff = pgoff; + return 0; +} + /* * set up a mapping for shared memory segments */ @@ -1132,20 +1159,29 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) struct mtd_file_info *mfi = file->private_data; struct mtd_info *mtd = mfi->mtd; struct map_info *map = mtd->priv; - unsigned long start; - unsigned long off; - u32 len; + resource_size_t start, off; + unsigned long len, vma_len; if (mtd->type == MTD_RAM || mtd->type == MTD_ROM) { - off = vma->vm_pgoff << PAGE_SHIFT; + off = get_vm_offset(vma); start = map->phys; len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size); start &= PAGE_MASK; - if ((vma->vm_end - vma->vm_start + off) > len) + vma_len = get_vm_size(vma); + + /* Overflow in off+len? */ + if (vma_len + off < off) + return -EINVAL; + /* Does it fit in the mapping? */ + if (vma_len + off > len) return -EINVAL; off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; + /* Did that overflow? */ + if (off < start) + return -EINVAL; + if (set_vm_offset(vma, off) < 0) + return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED; #ifdef pgprot_noncached From 8110e16d42d587997bcaee0c864179e6d93603fe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 17 Sep 2012 22:23:30 +0200 Subject: [PATCH 90/91] vfs: dcache: fix deadlock in tree traversal IBM reported a deadlock in select_parent(). This was found to be caused by taking rename_lock when already locked when restarting the tree traversal. There are two cases when the traversal needs to be restarted: 1) concurrent d_move(); this can only happen when not already locked, since taking rename_lock protects against concurrent d_move(). 2) racing with final d_put() on child just at the moment of ascending to parent; rename_lock doesn't protect against this rare race, so it can happen when already locked. Because of case 2, we need to be able to handle restarting the traversal when rename_lock is already held. This patch fixes all three callers of try_to_ascend(). IBM reported that the deadlock is gone with this patch. [ I rewrote the patch to be smaller and just do the "goto again" if the lock was already held, but credit goes to Miklos for the real work. - Linus ] Signed-off-by: Miklos Szeredi Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/dcache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 0364af2311f4..693f95bf1cae 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1134,6 +1134,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1236,6 +1238,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -3035,6 +3039,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; From a0d271cbfed1dd50278c6b06bead3d00ba0a88f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Sep 2012 16:47:46 -0700 Subject: [PATCH 91/91] Linux 3.6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3c11d589681..bb9fff26f078 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Terrified Chipmunk # *DOCUMENTATION*