From 5211613978cb7353a3237e4372958c0e7514683f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 22 Oct 2015 13:32:08 -0700
Subject: [PATCH 1/9] kmod: don't run async usermode helper as a child of
 kworker thread

call_usermodehelper_exec_sync() does fork() + wait() with "unignored"
SIGCHLD.  What we have missed is that this worker thread can have other
children previously forked by call_usermodehelper_exec_work() without
UMH_WAIT_PROC.  If such a child exits in between it becomes a zombie
because auto-reaping only works if SIGCHLD is ignored, and nobody can
reap it (unless/until this worker thread exits too).

Change the !UMH_WAIT_PROC case to use CLONE_PARENT.

Note: this is only first step.  All PF_KTHREAD tasks, even created by
kernel_thread() should have ->parent == kthreadd by default.

Fixes: bb304a5c6fc63d8506c ("kmod: handle UMH_WAIT_PROC from system unbound workqueue")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/kmod.c b/kernel/kmod.c
index da98d0593de2..0277d1216f80 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -327,9 +327,13 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
 		call_usermodehelper_exec_sync(sub_info);
 	} else {
 		pid_t pid;
-
+		/*
+		 * Use CLONE_PARENT to reparent it to kthreadd; we do not
+		 * want to pollute current->children, and we need a parent
+		 * that always ignores SIGCHLD to ensure auto-reaping.
+		 */
 		pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
-				    SIGCHLD);
+				    CLONE_PARENT | SIGCHLD);
 		if (pid < 0) {
 			sub_info->retval = pid;
 			umh_complete(sub_info);

From 67a2e213e7e937c41c52ab5bc46bf3f4de469f6e Mon Sep 17 00:00:00 2001
From: Rohit Vaswani <rvaswani@codeaurora.org>
Date: Thu, 22 Oct 2015 13:32:11 -0700
Subject: [PATCH 2/9] mm: cma: fix incorrect type conversion for size during
 dma allocation

This was found during userspace fuzzing test when a large size dma cma
allocation is made by driver(like ion) through userspace.

  show_stack+0x10/0x1c
  dump_stack+0x74/0xc8
  kasan_report_error+0x2b0/0x408
  kasan_report+0x34/0x40
  __asan_storeN+0x15c/0x168
  memset+0x20/0x44
  __dma_alloc_coherent+0x114/0x18c

Signed-off-by: Rohit Vaswani <rvaswani@codeaurora.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c  | 2 +-
 include/linux/cma.h            | 2 +-
 include/linux/dma-contiguous.h | 4 ++--
 mm/cma.c                       | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 950fff9ce453..a12ff9863d7e 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -187,7 +187,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * global one. Requires architecture specific dev_get_cma_area() helper
  * function.
  */
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int align)
 {
 	if (align > CONFIG_CMA_ALIGNMENT)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index f7ef093ec49a..29f9e774ab76 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -26,6 +26,6 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					unsigned int order_per_bit,
 					struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 #endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 569bbd039896..fec734df1524 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -111,7 +111,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 	return ret;
 }
 
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int order);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count);
@@ -144,7 +144,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 static inline
-struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 				       unsigned int order)
 {
 	return NULL;
diff --git a/mm/cma.c b/mm/cma.c
index e7d1db533025..4eb56badf37e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -361,7 +361,7 @@ err:
  * This function allocates part of contiguous memory on specific
  * contiguous memory area.
  */
-struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
+struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
 {
 	unsigned long mask, offset, pfn, start = 0;
 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
@@ -371,7 +371,7 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
 	if (!cma || !cma->count)
 		return NULL;
 
-	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+	pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma,
 		 count, align);
 
 	if (!count)

From 41192a2d6a7f4cd6af9fc2f8edbbf24b2694f2f6 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Thu, 22 Oct 2015 13:32:13 -0700
Subject: [PATCH 3/9] MAINTAINERS: add Sergey as zsmalloc reviewer

Nominate myself as a zsmalloc reviewer.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fb7d2e4af200..d1116d9ea6ae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11674,6 +11674,7 @@ F:	drivers/tty/serial/zs.*
 ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
 M:	Minchan Kim <minchan@kernel.org>
 M:	Nitin Gupta <ngupta@vflare.org>
+R:	Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/zsmalloc.c

From b8fa0efa01109e294e9be610465c324f771cb5ba Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 22 Oct 2015 13:32:16 -0700
Subject: [PATCH 4/9] mailmap: update Javier Martinez Canillas' email

The get_maintainer script still reports my old Collabora email based on
old commits but that address no longer exist so update mailmap to report
my current email and avoid people sending to the old address.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 4b31af54ccd5..b1e9a97653dc 100644
--- a/.mailmap
+++ b/.mailmap
@@ -59,6 +59,7 @@ James Bottomley <jejb@mulgrave.(none)>
 James Bottomley <jejb@titanic.il.steeleye.com>
 James E Wilson <wilson@specifix.com>
 James Ketrenos <jketreno@io.(none)>
+<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
 Jean Tourrilhes <jt@hpl.hp.com>
 Jeff Garzik <jgarzik@pretzel.yyz.us>
 Jens Axboe <axboe@suse.de>

From 47aee4d8e314384807e98b67ade07f6da476aa75 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 22 Oct 2015 13:32:19 -0700
Subject: [PATCH 5/9] thp: use is_zero_pfn() only after pte_present() check

Use is_zero_pfn() on pteval only after pte_present() check on pteval
(It might be better idea to introduce is_zero_pte() which checks
pte_present() first).

Otherwise when working on a swap or migration entry and if pte_pfn's
result is equal to zero_pfn by chance, we lose user's data in
__collapse_huge_page_copy().  So if you're unlucky, the application
segfaults and finally you could see below message on exit:

BUG: Bad rss-counter state mm:ffff88007f099300 idx:2 val:3

Fixes: ca0984caa823 ("mm: incorporate zero pages into transparent huge pages")
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: <stable@vger.kernel.org>	[4.1+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4b06b8db9df2..bbac913f96bc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2206,7 +2206,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
 	     _pte++, address += PAGE_SIZE) {
 		pte_t pteval = *_pte;
-		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+		if (pte_none(pteval) || (pte_present(pteval) &&
+				is_zero_pfn(pte_pfn(pteval)))) {
 			if (!userfaultfd_armed(vma) &&
 			    ++none_or_zero <= khugepaged_max_ptes_none)
 				continue;

From 296291cdd1629c308114504b850dc343eabc2782 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Thu, 22 Oct 2015 13:32:21 -0700
Subject: [PATCH 6/9] mm: make sendfile(2) killable

Currently a simple program below issues a sendfile(2) system call which
takes about 62 days to complete in my test KVM instance.

        int fd;
        off_t off = 0;

        fd = open("file", O_RDWR | O_TRUNC | O_SYNC | O_CREAT, 0644);
        ftruncate(fd, 2);
        lseek(fd, 0, SEEK_END);
        sendfile(fd, fd, &off, 0xfffffff);

Now you should not ask kernel to do a stupid stuff like copying 256MB in
2-byte chunks and call fsync(2) after each chunk but if you do, sysadmin
should have a way to stop you.

We actually do have a check for fatal_signal_pending() in
generic_perform_write() which triggers in this path however because we
always succeed in writing something before the check is done, we return
value > 0 from generic_perform_write() and thus the information about
signal gets lost.

Fix the problem by doing the signal check before writing anything.  That
way generic_perform_write() returns -EINTR, the error gets propagated up
and the sendfile loop terminates early.

Signed-off-by: Jan Kara <jack@suse.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 1cc5467cf36c..327910c2400c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2488,6 +2488,11 @@ again:
 			break;
 		}
 
+		if (fatal_signal_pending(current)) {
+			status = -EINTR;
+			break;
+		}
+
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
 		if (unlikely(status < 0))
@@ -2525,10 +2530,6 @@ again:
 		written += copied;
 
 		balance_dirty_pages_ratelimited(mapping);
-		if (fatal_signal_pending(current)) {
-			status = -EINTR;
-			break;
-		}
 	} while (iov_iter_count(i));
 
 	return written ? written : status;

From 3f181b4d8652f7bcd7e9932c7307b8ecd4d87cf6 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 22 Oct 2015 13:32:24 -0700
Subject: [PATCH 7/9] lib/Kconfig.debug: disable -Wframe-larger-than warnings
 with KASAN=y

When the kernel compiled with KASAN=y, GCC adds redzones for each
variable on stack.  This enlarges function's stack frame and causes:

	'warning: the frame size of X bytes is larger than Y bytes'

The worst case I've seen for now is following:

   ../net/wireless/nl80211.c: In function `nl80211_send_wiphy':
   ../net/wireless/nl80211.c:1731:1: warning: the frame size of 5448 bytes is larger than 2048 bytes [-Wframe-larger-than=]

That kind of warning becomes useless with KASAN=y.  It doesn't
necessarily indicate that there is some problem in the code, thus we
should turn it off.

(The KASAN=y stack size in increased from 16k to 32k for this reason)

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Abylay Ospan <aospan@netup.ru>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Kozlov Sergey <serjk@netup.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ab76b99adc85..1d1521c26302 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -197,6 +197,7 @@ config ENABLE_MUST_CHECK
 config FRAME_WARN
 	int "Warn for stack frames larger than (needs gcc 4.4)"
 	range 0 8192
+	default 0 if KASAN
 	default 1024 if !64BIT
 	default 2048 if 64BIT
 	help

From bb387002693ed28b2bb0408c5dec65521b71e5f1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 22 Oct 2015 13:32:27 -0700
Subject: [PATCH 8/9] fault-inject: fix inverted interval/probability values in
 printk

interval displays the probability and vice versa.

Fixes: 6adc4a22f20bb ("fault-inject: add ratelimit option")
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/fault-inject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f1cdeb024d17..6a823a53e357 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -44,7 +44,7 @@ static void fail_dump(struct fault_attr *attr)
 		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
 		       "name %pd, interval %lu, probability %lu, "
 		       "space %d, times %d\n", attr->dname,
-		       attr->probability, attr->interval,
+		       attr->interval, attr->probability,
 		       atomic_read(&attr->space),
 		       atomic_read(&attr->times));
 		if (attr->verbose > 1)

From b67de018b37a97548645a879c627d4188518e907 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Thu, 22 Oct 2015 13:32:29 -0700
Subject: [PATCH 9/9] ocfs2/dlm: unlock lockres spinlock before dlm_lockres_put

dlm_lockres_put will call dlm_lockres_release if it is the last
reference, and then it may call dlm_print_one_lock_resource and
take lockres spinlock.

So unlock lockres spinlock before dlm_lockres_put to avoid deadlock.

Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c   | 3 ++-
 fs/ocfs2/dlm/dlmrecovery.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ee5aa4daaea0..ce38b4ccc9ab 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1658,12 +1658,13 @@ send_response:
 		if (ret < 0) {
 			mlog(ML_ERROR, "failed to dispatch assert master work\n");
 			response = DLM_MASTER_RESP_ERROR;
+			spin_unlock(&res->spinlock);
 			dlm_lockres_put(res);
 		} else {
 			dispatched = 1;
 			__dlm_lockres_grab_inflight_worker(dlm, res);
+			spin_unlock(&res->spinlock);
 		}
-		spin_unlock(&res->spinlock);
 	} else {
 		if (res)
 			dlm_lockres_put(res);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 3d90ad7ff91f..58eaa5c0d387 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1723,8 +1723,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 			} else {
 				dispatched = 1;
 				__dlm_lockres_grab_inflight_worker(dlm, res);
+				spin_unlock(&res->spinlock);
 			}
-			spin_unlock(&res->spinlock);
 		} else {
 			/* put.. incase we are not the master */
 			spin_unlock(&res->spinlock);