From 9f267a17bfb4eb52a1705d0886a7220e6470bf52 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jun 2020 18:41:22 -0700 Subject: [PATCH 01/16] khugepaged: selftests: fix timeout condition in wait_for_scan() The loop exits with "timeout" set to -1 and not to 0 so the test needs to be fixed. Fixes: e7b592f6caca ("khugepaged: add self test") Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Cc: Shuah Khan Cc: Stephen Rothwell Cc: Zi Yan Cc: William Kucharski Cc: Yang Shi Cc: John Hubbard Link: http://lkml.kernel.org/r/20200605110736.GH978434@mwanda Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 51b89cedd09d..8b75821302a7 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -502,7 +502,7 @@ static bool wait_for_scan(const char *msg, char *p) madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - return !timeout; + return timeout == -1; } static void alloc_at_fault(void) From 405906600a33a37e225817c085b1807ad46784b4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 10 Jun 2020 18:41:25 -0700 Subject: [PATCH 02/16] scripts/spelling: add a few more typos This commit adds typos I found from another work. Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Cc: Joe Perches Link: http://lkml.kernel.org/r/20200605092502.18018-3-sjpark@amazon.com Signed-off-by: Linus Torvalds --- scripts/spelling.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index d9cd24cf0d40..c45e9afaab2d 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -59,6 +59,7 @@ actualy||actually acumulating||accumulating acumulative||accumulative acumulator||accumulator +acutally||actually adapater||adapter addional||additional additionaly||additionally @@ -249,6 +250,7 @@ calescing||coalescing calle||called callibration||calibration callled||called +callser||caller calucate||calculate calulate||calculate cancelation||cancellation @@ -671,6 +673,7 @@ hanlde||handle hanled||handled happend||happened harware||hardware +havind||having heirarchically||hierarchically helpfull||helpful hexdecimal||hexadecimal @@ -845,6 +848,7 @@ logile||logfile loobpack||loopback loosing||losing losted||lost +maangement||management machinary||machinery maibox||mailbox maintainance||maintenance @@ -905,6 +909,7 @@ modfiy||modify modulues||modules momery||memory memomry||memory +monitring||monitoring monochorome||monochrome monochromo||monochrome monocrome||monochrome @@ -1010,6 +1015,7 @@ partiton||partition pased||passed passin||passing pathes||paths +pattrns||patterns pecularities||peculiarities peformance||performance peforming||performing @@ -1256,6 +1262,7 @@ shoule||should shrinked||shrunk siginificantly||significantly signabl||signal +significanly||significantly similary||similarly similiar||similar simlar||similar @@ -1371,6 +1378,7 @@ thead||thread therfore||therefore thier||their threds||threads +threee||three threshhold||threshold thresold||threshold throught||through @@ -1410,6 +1418,7 @@ tyep||type udpate||update uesd||used uknown||unknown +usccess||success usupported||unsupported uncommited||uncommitted unconditionaly||unconditionally From 3021e69219e2f3df6d01243000db32d1325cdd0d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 10 Jun 2020 18:41:28 -0700 Subject: [PATCH 03/16] kcov: check kcov_softirq in kcov_remote_stop() kcov_remote_stop() should check that the corresponding kcov_remote_start() actually found the specified remote handle and started collecting coverage. This is done by checking the per thread kcov_softirq flag. A particular failure scenario where this was observed involved a softirq with a remote coverage collection section coming between check_kcov_mode() and the access to t->kcov_area in __sanitizer_cov_trace_pc(). In that softirq kcov_remote_start() bailed out after kcov_remote_find() check, but the matching kcov_remote_stop() didn't check if kcov_remote_start() succeeded, and overwrote per thread kcov parameters with invalid (zero) values. Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts") Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Reviewed-by: Dmitry Vyukov Cc: Alexander Potapenko Cc: Marco Elver Cc: Tetsuo Handa Link: http://lkml.kernel.org/r/fcd1cd16eac1d2c01a66befd8ea4afc6f8d09833.1591576806.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- kernel/kcov.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 55c5d883a93e..6afae0bcbac4 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -427,7 +427,8 @@ void kcov_task_exit(struct task_struct *t) * WARN_ON(!kcov->remote && kcov->t != t); * * For KCOV_REMOTE_ENABLE devices, the exiting task is either: - * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * + * 1. A remote task between kcov_remote_start() and kcov_remote_stop(). * In this case we should print a warning right away, since a task * shouldn't be exiting when it's in a kcov coverage collection * section. Here t points to the task that is collecting remote @@ -437,7 +438,7 @@ void kcov_task_exit(struct task_struct *t) * WARN_ON(kcov->remote && kcov->t != t); * * 2. The task that created kcov exiting without calling KCOV_DISABLE, - * and then again we can make sure that t->kcov->t == t: + * and then again we make sure that t->kcov->t == t: * WARN_ON(kcov->remote && kcov->t != t); * * By combining all three checks into one we get: @@ -764,7 +765,7 @@ static const struct file_operations kcov_fops = { * Internally, kcov_remote_start() looks up the kcov device associated with the * provided handle, allocates an area for coverage collection, and saves the * pointers to kcov and area into the current task_struct to allow coverage to - * be collected via __sanitizer_cov_trace_pc() + * be collected via __sanitizer_cov_trace_pc(). * In turns kcov_remote_stop() clears those pointers from task_struct to stop * collecting coverage and copies all collected coverage into the kcov area. */ @@ -972,16 +973,25 @@ void kcov_remote_stop(void) local_irq_restore(flags); return; } - kcov = t->kcov; - area = t->kcov_area; - size = t->kcov_size; - sequence = t->kcov_sequence; - + /* + * When in softirq, check if the corresponding kcov_remote_start() + * actually found the remote handle and started collecting coverage. + */ + if (in_serving_softirq() && !t->kcov_softirq) { + local_irq_restore(flags); + return; + } + /* Make sure that kcov_softirq is only set when in softirq. */ if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { local_irq_restore(flags); return; } + kcov = t->kcov; + area = t->kcov_area; + size = t->kcov_size; + sequence = t->kcov_sequence; + kcov_stop(t); if (in_serving_softirq()) { t->kcov_softirq = 0; From e8ec04938c446e5f4dc53e0147f0f679995012ee Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 10 Jun 2020 18:41:32 -0700 Subject: [PATCH 04/16] lib/lz4/lz4_decompress.c: document deliberate use of `&' This operation was intentional, but tools such as smatch will warn that it might not have been. Signed-off-by: Andrew Morton Cc: Yann Collet Cc: Vasily Averin Cc: Gao Xiang Link: http://lkml.kernel.org/r/3bf931c6ea0cae3e23f3485801986859851b4f04.camel@perches.com Signed-off-by: Linus Torvalds --- lib/lz4/lz4_decompress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 0c9d3ad17e0f..5371dab6b481 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -141,6 +141,9 @@ static FORCE_INLINE int LZ4_decompress_generic( * space in the output for those 18 bytes earlier, upon * entering the shortcut (in other words, there is a * combined check for both stages). + * + * The & in the likely() below is intentionally not && so that + * some compilers can produce better parallelized runtime code */ if ((endOnInput ? length != RUN_MASK : length <= 8) /* From 8301c719a2bd131436438e49130ee381d30933f5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 10 Jun 2020 18:41:35 -0700 Subject: [PATCH 05/16] nilfs2: fix null pointer dereference at nilfs_segctor_do_construct() After commit c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages"), the following null pointer dereference has been reported on nilfs2: BUG: kernel NULL pointer dereference, address: 00000000000000a8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI ... RIP: 0010:percpu_counter_add_batch+0xa/0x60 ... Call Trace: __test_set_page_writeback+0x2d3/0x330 nilfs_segctor_do_construct+0x10d3/0x2110 [nilfs2] nilfs_segctor_construct+0x168/0x260 [nilfs2] nilfs_segctor_thread+0x127/0x3b0 [nilfs2] kthread+0xf8/0x130 ... This crash turned out to be caused by set_page_writeback() call for segment summary buffers at nilfs_segctor_prepare_write(). set_page_writeback() can call inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK) where inode_to_wb(inode) is NULL if the inode of underlying block device does not have an associated wb. This fixes the issue by calling inode_attach_wb() in advance to ensure to associate the bdev inode with its wb. Fixes: c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages") Reported-by: Walton Hoops Reported-by: Tomas Hlavaty Reported-by: ARAI Shun-ichi Reported-by: Hideki EIRAKU Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Tested-by: Ryusuke Konishi Cc: [5.4+] Link: http://lkml.kernel.org/r/20200608.011819.1399059588922299158.konishi.ryusuke@gmail.com Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 445eef41bfaf..91b58c897f92 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2780,6 +2780,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; + inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); + err = nilfs_segctor_start_thread(nilfs->ns_writer); if (err) { kfree(nilfs->ns_writer); From 2581ac7c2f6b093eea3340a4e2db8a55fc9f8470 Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Wed, 10 Jun 2020 18:41:38 -0700 Subject: [PATCH 06/16] checkpatch: correct check for kernel parameters doc Adding a new kernel parameter with documentation makes checkpatch complain __setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst The list of kernel parameters has moved to a separate txt file, but checkpatch has not been updated for this. Make checkpatch.pl look for the documentation for new kernel parameters in kernel-parameters.txt instead of kernel-parameters.rst. Fixes: e52347bd66f6 ("Documentation/admin-guide: split the kernel parameter list to a separate file") Signed-off-by: Tim Froidcoeur Signed-off-by: Andrew Morton Acked-by: Joe Perches Cc: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 197436b20288..e23c912548f5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2407,7 +2407,7 @@ sub process { if ($rawline=~/^\+\+\+\s+(\S+)/) { $setup_docs = 0; - if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) { + if ($1 =~ m@Documentation/admin-guide/kernel-parameters.txt$@) { $setup_docs = 1; } #next; @@ -6388,7 +6388,7 @@ sub process { if (!grep(/$name/, @setup_docs)) { CHK("UNDOCUMENTED_SETUP", - "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr); + "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.txt\n" . $herecurr); } } From 81c4f4d924d5d009b5ed785a3e22b18d0f7b831f Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 10 Jun 2020 18:41:41 -0700 Subject: [PATCH 07/16] lib: fix bitmap_parse() on 64-bit big endian archs Commit 2d6261583be0 ("lib: rework bitmap_parse()") does not take into account order of halfwords on 64-bit big endian architectures. As result (at least) Receive Packet Steering, IRQ affinity masks and runtime kernel test "test_bitmap" get broken on s390. [andriy.shevchenko@linux.intel.com: convert infinite while loop to a for loop] Link: http://lkml.kernel.org/r/20200609140535.87160-1-andriy.shevchenko@linux.intel.com Fixes: 2d6261583be0 ("lib: rework bitmap_parse()") Signed-off-by: Alexander Gordeev Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Yury Norov Cc: Amritha Nambiar Cc: Arnaldo Carvalho de Melo Cc: Chris Wilson Cc: Kees Cook Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Rasmus Villemoes Cc: Steffen Klassert Cc: "Tobin C . Harding" Cc: Vineet Gupta Cc: Will Deacon Cc: Willem de Bruijn Cc: Link: http://lkml.kernel.org/r/1591634471-17647-1-git-send-email-agordeev@linux.ibm.com Signed-off-by: Linus Torvalds --- lib/bitmap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 21a7640c5eed..0364452b1617 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -741,8 +741,9 @@ int bitmap_parse(const char *start, unsigned int buflen, int chunks = BITS_TO_U32(nmaskbits); u32 *bitmap = (u32 *)maskp; int unset_bit; + int chunk; - while (1) { + for (chunk = 0; ; chunk++) { end = bitmap_find_region_reverse(start, end); if (start > end) break; @@ -750,7 +751,11 @@ int bitmap_parse(const char *start, unsigned int buflen, if (!chunks--) return -EOVERFLOW; - end = bitmap_get_x32_reverse(start, end, bitmap++); +#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]); +#else + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]); +#endif if (IS_ERR(end)) return PTR_ERR(end); } From 787d563b8642f35c561e439ebaf7ee5653437d05 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 10 Jun 2020 18:41:44 -0700 Subject: [PATCH 08/16] mm/debug_vm_pgtable: fix kernel crash by checking for THP support Architectures can have CONFIG_TRANSPARENT_HUGEPAGE enabled but no THP support enabled based on platforms. For ex: with 4K PAGE_SIZE ppc64 supports THP only with radix translation. This results in below crash when running with hash translation and 4K PAGE_SIZE. kernel BUG at arch/powerpc/include/asm/book3s/64/hash-4k.h:140! cpu 0x61: Vector: 700 (Program Check) at [c000000ff948f860] pc: debug_vm_pgtable+0x480/0x8b0 lr: debug_vm_pgtable+0x474/0x8b0 ... debug_vm_pgtable+0x374/0x8b0 (unreliable) do_one_initcall+0x98/0x4f0 kernel_init_freeable+0x330/0x3fc kernel_init+0x24/0x148 Check for THP support correctly Link: http://lkml.kernel.org/r/20200608125252.407659-1-aneesh.kumar@linux.ibm.com Fixes: 399145f9eb6c ("mm/debug: add tests validating architecture page table helpers") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug_vm_pgtable.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 9ec59c38d6a2..e45623016aea 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -60,6 +60,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { pmd_t pmd = pfn_pmd(pfn, prot); + if (!has_transparent_hugepage()) + return; + WARN_ON(!pmd_same(pmd, pmd)); WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); @@ -79,6 +82,9 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { pud_t pud = pfn_pud(pfn, prot); + if (!has_transparent_hugepage()) + return; + WARN_ON(!pud_same(pud, pud)); WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); From cc989e78472e29c69b196d6985e718c8834027b7 Mon Sep 17 00:00:00 2001 From: Keyur Patel Date: Wed, 10 Jun 2020 18:41:47 -0700 Subject: [PATCH 09/16] ocfs2: fix spelling mistake and grammar ./ocfs2/mmap.c:65: bebongs ==> belonging Signed-off-by: Keyur Patel Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Link: http://lkml.kernel.org/r/20200608014818.102358-1-iamkeyur96@gmail.com Signed-off-by: Linus Torvalds --- fs/ocfs2/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3a44e461828a..25cabbfe87fc 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -62,7 +62,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, last_index = (size - 1) >> PAGE_SHIFT; /* - * There are cases that lead to the page no longer bebongs to the + * There are cases that lead to the page no longer belonging to the * mapping. * 1) pagecache truncates locally due to memory pressure. * 2) pagecache truncates when another is taking EX lock against From 496df3d3ab8a407f83330fb8d7160a5f91898c55 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 10 Jun 2020 18:41:50 -0700 Subject: [PATCH 10/16] mm: add comments on pglist_data zones While making other modifications it was easy to confuse the two struct members node_zones and node_zonelists. For those already familiar with the code, this might seem to be a silly patch, but it's quite helpful to disambiguate the similar-sounding fields While here, add a small comment on why nr_zones isn't simply MAX_NR_ZONES Signed-off-by: Ben Widawsky Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200520205443.2757414-1-ben.widawsky@intel.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index df1f08486d81..c4c37fd12104 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -660,9 +660,21 @@ struct deferred_split { * per-zone basis. */ typedef struct pglist_data { + /* + * node_zones contains just the zones for THIS node. Not all of the + * zones may be populated, but it is the full list. It is referenced by + * this node's node_zonelists as well as other node's node_zonelists. + */ struct zone node_zones[MAX_NR_ZONES]; + + /* + * node_zonelists contains references to all zones in all nodes. + * Generally the first zones will be references to this node's + * node_zones. + */ struct zonelist node_zonelists[MAX_ZONELISTS]; - int nr_zones; + + int nr_zones; /* number of populated zones in this node */ #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION From 6af132f3a11e6d2d9032ce51c882aa5933543255 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 10 Jun 2020 18:41:53 -0700 Subject: [PATCH 11/16] lib: test get_count_order/long in test_bitops.c Add some tests for get_count_order/long in test_bitops.c. [akpm@linux-foundation.org: define local `i'] [akpm@linux-foundation.org: enhancement, warning fix, cleanup per Geert] [akpm@linux-foundation.org: fix loop bound, per Wei Yang] Signed-off-by: Wei Yang Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Christian Brauner Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200602223728.32722-1-richard.weiyang@gmail.com Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 10 ++++----- lib/test_bitops.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb98741601bd..5beec9c833ce 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2052,15 +2052,15 @@ config TEST_LKM If unsure, say N. config TEST_BITOPS - tristate "Test module for compilation of clear_bit/set_bit operations" + tristate "Test module for compilation of bitops operations" depends on m help This builds the "test_bitops" module that is much like the TEST_LKM module except that it does a basic exercise of the - clear_bit and set_bit macros to make sure there are no compiler - warnings from C=1 sparse checker or -Wextra compilations. It has - no dependencies and doesn't run or load unless explicitly requested - by name. for example: modprobe test_bitops. + set/clear_bit macros and get_count_order/long to make sure there are + no compiler warnings from C=1 sparse checker or -Wextra + compilations. It has no dependencies and doesn't run or load unless + explicitly requested by name. for example: modprobe test_bitops. If unsure, say N. diff --git a/lib/test_bitops.c b/lib/test_bitops.c index fd50b3ae4a14..ced25e3a779b 100644 --- a/lib/test_bitops.c +++ b/lib/test_bitops.c @@ -9,7 +9,11 @@ #include #include -/* a tiny module only meant to test set/clear_bit */ +/* a tiny module only meant to test + * + * set/clear_bit + * get_count_order/long + */ /* use an enum because thats the most common BITMAP usage */ enum bitops_fun { @@ -24,14 +28,59 @@ enum bitops_fun { static DECLARE_BITMAP(g_bitmap, BITOPS_LENGTH); +static unsigned int order_comb[][2] = { + {0x00000003, 2}, + {0x00000004, 2}, + {0x00001fff, 13}, + {0x00002000, 13}, + {0x50000000, 31}, + {0x80000000, 31}, + {0x80003000, 32}, +}; + +#ifdef CONFIG_64BIT +static unsigned long order_comb_long[][2] = { + {0x0000000300000000, 34}, + {0x0000000400000000, 34}, + {0x00001fff00000000, 45}, + {0x0000200000000000, 45}, + {0x5000000000000000, 63}, + {0x8000000000000000, 63}, + {0x8000300000000000, 64}, +}; +#endif + static int __init test_bitops_startup(void) { + int i; + pr_warn("Loaded test module\n"); set_bit(BITOPS_4, g_bitmap); set_bit(BITOPS_7, g_bitmap); set_bit(BITOPS_11, g_bitmap); set_bit(BITOPS_31, g_bitmap); set_bit(BITOPS_88, g_bitmap); + + for (i = 0; i < ARRAY_SIZE(order_comb); i++) { + if (order_comb[i][1] != get_count_order(order_comb[i][0])) + pr_warn("get_count_order wrong for %x\n", + order_comb[i][0]); + } + + for (i = 0; i < ARRAY_SIZE(order_comb); i++) { + if (order_comb[i][1] != get_count_order_long(order_comb[i][0])) + pr_warn("get_count_order_long wrong for %x\n", + order_comb[i][0]); + } + +#ifdef CONFIG_64BIT + for (i = 0; i < ARRAY_SIZE(order_comb_long); i++) { + if (order_comb_long[i][1] != + get_count_order_long(order_comb_long[i][0])) + pr_warn("get_count_order_long wrong for %lx\n", + order_comb_long[i][0]); + } +#endif return 0; } @@ -55,6 +104,6 @@ static void __exit test_bitops_unstartup(void) module_init(test_bitops_startup); module_exit(test_bitops_unstartup); -MODULE_AUTHOR("Jesse Brandeburg "); +MODULE_AUTHOR("Jesse Brandeburg , Wei Yang "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Bit testing module"); From 2c34f6f738d2509cf9b710abd35a605acfd6ada6 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Wed, 10 Jun 2020 18:41:56 -0700 Subject: [PATCH 12/16] stacktrace: cleanup inconsistent variable type Modify the variable type of 'skip' member of struct stack_trace. In theory, the 'skip' variable type should be unsigned int. There are two reasons: - The 'skip' only has two situation, 1)Positive value, 2)Zero - The 'skip' of struct stack_trace has inconsistent type with struct stack_trace_data, it makes a bit confusion in the relationship between struct stack_trace and stack_trace_data. Signed-off-by: Walter Wu Signed-off-by: Andrew Morton Reviewed-by: Bart Van Assche Cc: Matthias Brugger Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Link: http://lkml.kernel.org/r/20200421013511.5960-1-walter-zh.wu@mediatek.com Signed-off-by: Linus Torvalds --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 83bd8cb475d7..b7af8cc13eda 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -64,7 +64,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; - int skip; /* input argument: How many entries to skip */ + unsigned int skip; /* input argument: How many entries to skip */ }; extern void save_stack_trace(struct stack_trace *trace); From 9bf5b9eb232b34738800868e30bea3bad4a6a1ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:41:59 -0700 Subject: [PATCH 13/16] kernel: move use_mm/unuse_mm to kthread.c Patch series "improve use_mm / unuse_mm", v2. This series improves the use_mm / unuse_mm interface by better documenting the assumptions, and my taking the set_fs manipulations spread over the callers into the core API. This patch (of 3): Use the proper API instead. Link: http://lkml.kernel.org/r/20200404094101.672954-1-hch@lst.de These helpers are only for use with kernel threads, and I will tie them more into the kthread infrastructure going forward. Also move the prototypes to kthread.h - mmu_context.h was a little weird to start with as it otherwise contains very low-level MM bits. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Felix Kuehling Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Zhenyu Wang Cc: Zhi Wang Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200404094101.672954-1-hch@lst.de Link: http://lkml.kernel.org/r/20200416053158.586887-1-hch@lst.de Link: http://lkml.kernel.org/r/20200404094101.672954-5-hch@lst.de Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 - .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 - drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 2 +- drivers/vhost/vhost.c | 1 - fs/aio.c | 1 - fs/io-wq.c | 1 - fs/io_uring.c | 1 - include/linux/kthread.h | 5 ++ include/linux/mmu_context.h | 5 -- kernel/kthread.c | 56 ++++++++++++++++ mm/Makefile | 2 +- mm/mmu_context.c | 64 ------------------- 18 files changed, 66 insertions(+), 85 deletions(-) delete mode 100644 mm/mmu_context.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 53b4126373a5..b94bbb8e7bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6529caca88fe..35d4a5ab0228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..bf927f432506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,7 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c6944739183a..744366c7ee85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 2f4bdc80a6b2..feab4cc6e836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gfx_v8_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c7fd0c47b254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,8 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_9_0_offset.h" diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index eee530453aa6..ad8a9df49f29 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 494f853f2206..7ae54b7b637b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 3afddd3bea6e..20fba95ed0a6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 596132a96cd5..ffc7cc31d7eb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/aio.c b/fs/aio.c index 7e079137fdcf..7ecddc2f38db 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/io-wq.c b/fs/io-wq.c index 4023c9846860..5f590bf27bff 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/io_uring.c b/fs/io_uring.c index 9fb0dc6033ba..9842443dde20 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..c2d40c9672d6 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -5,6 +5,8 @@ #include #include +struct mm_struct; + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -198,6 +200,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +void use_mm(struct mm_struct *mm); +void unuse_mm(struct mm_struct *mm); + struct cgroup_subsys_state; #ifdef CONFIG_BLK_CGROUP diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index d9a543a9e1cc..c51a84132d7c 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -4,11 +4,6 @@ #include -struct mm_struct; - -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); - /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..ce4610316377 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1,13 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* Kernel thread helper functions. * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. * * Creation is done via kthreadd, so that we get a clean environment * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ #include +#include +#include #include +#include #include #include #include @@ -25,6 +29,7 @@ #include #include + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; @@ -1203,6 +1208,57 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +/* + * use_mm + * Makes the calling kernel thread take on the specified + * mm context. + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void use_mm(struct mm_struct *mm) +{ + struct mm_struct *active_mm; + struct task_struct *tsk = current; + + task_lock(tsk); + active_mm = tsk->active_mm; + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); + task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif + + if (active_mm != mm) + mmdrop(active_mm); +} +EXPORT_SYMBOL_GPL(use_mm); + +/* + * unuse_mm + * Reverses the effect of use_mm, i.e. releases the + * specified mm context which was earlier taken on + * by the calling kernel thread + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void unuse_mm(struct mm_struct *mm) +{ + struct task_struct *tsk = current; + + task_lock(tsk); + sync_mm_rss(mm); + tsk->mm = NULL; + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); +} +EXPORT_SYMBOL_GPL(unuse_mm); + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/mm/Makefile b/mm/Makefile index 662fd1504646..cc8f897dfac0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,7 +41,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ - mm_init.o mmu_context.o percpu.o slab_common.o \ + mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o $(mmu-y) diff --git a/mm/mmu_context.c b/mm/mmu_context.c deleted file mode 100644 index 3e612ae748e9..000000000000 --- a/mm/mmu_context.c +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * - * See ../COPYING for licensing terms. - */ - -#include -#include -#include -#include -#include -#include - -#include - -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); - tsk->active_mm = mm; - } - tsk->mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); -#ifdef finish_arch_post_lock_switch - finish_arch_post_lock_switch(); -#endif - - if (active_mm != mm) - mmdrop(active_mm); -} -EXPORT_SYMBOL_GPL(use_mm); - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - sync_mm_rss(mm); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} -EXPORT_SYMBOL_GPL(unuse_mm); From 4dbe59a6ae358f30b6a9a50406249d54cc954dc1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:42:03 -0700 Subject: [PATCH 14/16] kernel: move use_mm/unuse_mm to kthread.c cover the newly merged use_mm/unuse_mm caller in vfio Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Felix Kuehling Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Jason Wang Cc: Jens Axboe Cc: "Michael S. Tsirkin" Cc: Zhenyu Wang Cc: Zhi Wang Link: http://lkml.kernel.org/r/20200416053158.586887-2-hch@lst.de Signed-off-by: Linus Torvalds --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 186acd8675ff..d5c08a750441 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include From f5678e7f2ac31c270334b936352f0ef2fe7dd2b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:42:06 -0700 Subject: [PATCH 15/16] kernel: better document the use_mm/unuse_mm API contract Switch the function documentation to kerneldoc comments, and add WARN_ON_ONCE asserts that the calling thread is a kernel thread and does not have ->mm set (or has ->mm set in the case of unuse_mm). Also give the functions a kthread_ prefix to better document the use case. [hch@lst.de: fix a comment typo, cover the newly merged use_mm/unuse_mm caller in vfio] Link: http://lkml.kernel.org/r/20200416053158.586887-3-hch@lst.de [sfr@canb.auug.org.au: powerpc/vas: fix up for {un}use_mm() rename] Link: http://lkml.kernel.org/r/20200422163935.5aa93ba5@canb.auug.org.au Signed-off-by: Christoph Hellwig Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Felix Kuehling Acked-by: Greg Kroah-Hartman [usb] Acked-by: Haren Myneni Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Zhenyu Wang Cc: Zhi Wang Link: http://lkml.kernel.org/r/20200404094101.672954-6-hch@lst.de Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powernv/vas-fault.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +-- drivers/usb/gadget/function/f_fs.c | 4 +-- drivers/usb/gadget/legacy/inode.c | 4 +-- drivers/vfio/vfio_iommu_type1.c | 4 +-- drivers/vhost/vhost.c | 4 +-- fs/io-wq.c | 6 ++-- fs/io_uring.c | 4 +-- include/linux/kthread.h | 4 +-- kernel/kthread.c | 33 +++++++++++----------- mm/oom_kill.c | 6 ++-- mm/vmacache.c | 4 +-- 12 files changed, 40 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 25db70be4c9c..266a6ca5e15e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - use_mm(window->mm); + kthread_use_mm(window->mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - unuse_mm(window->mm); + kthread_unuse_mm(window->mm); put_task_struct(tsk); /* Success */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b94bbb8e7bb4..142746836838 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -197,9 +197,9 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ - use_mm(mmptr); \ + kthread_use_mm(mmptr); \ valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ + kthread_unuse_mm(mmptr); \ } \ pagefault_enable(); \ } \ diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7ae54b7b637b..f80b2747d7c5 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -827,9 +827,9 @@ static void ffs_user_copy_worker(struct work_struct *work) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(io_data->mm); + kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); - unuse_mm(io_data->mm); + kthread_unuse_mm(io_data->mm); set_fs(oldfs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 20fba95ed0a6..9ee0bfe7bcda 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work) struct kiocb *iocb = priv->iocb; size_t ret; - use_mm(mm); + kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); - unuse_mm(mm); + kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d5c08a750441..5e556ac9102a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2817,7 +2817,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, return -EPERM; if (kthread) - use_mm(mm); + kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2844,7 +2844,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) - unuse_mm(mm); + kthread_unuse_mm(mm); out: mmput(mm); return *copied ? 0 : -EFAULT; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ffc7cc31d7eb..1ad3d10c121a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,7 +332,7 @@ static int vhost_worker(void *data) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(dev->mm); + kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ @@ -360,7 +360,7 @@ static int vhost_worker(void *data) schedule(); } } - unuse_mm(dev->mm); + kthread_unuse_mm(dev->mm); set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 5f590bf27bff..748621f7391e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -170,7 +170,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) } __set_current_state(TASK_RUNNING); set_fs(KERNEL_DS); - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -417,7 +417,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) { if (worker->mm) { - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -426,7 +426,7 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) return; } if (mmget_not_zero(work->mm)) { - use_mm(work->mm); + kthread_use_mm(work->mm); if (!worker->mm) set_fs(USER_DS); worker->mm = work->mm; diff --git a/fs/io_uring.c b/fs/io_uring.c index 9842443dde20..ec4e9d36210b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5866,7 +5866,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_mm && !current->mm) { if (unlikely(!mmget_not_zero(ctx->sqo_mm))) return -EFAULT; - use_mm(ctx->sqo_mm); + kthread_use_mm(ctx->sqo_mm); } sqe_flags = READ_ONCE(sqe->flags); @@ -5980,7 +5980,7 @@ static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) struct mm_struct *mm = current->mm; if (mm) { - unuse_mm(mm); + kthread_unuse_mm(mm); mmput(mm); } } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c2d40c9672d6..12258ea077cf 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -200,8 +200,8 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); +void kthread_use_mm(struct mm_struct *mm); +void kthread_unuse_mm(struct mm_struct *mm); struct cgroup_subsys_state; diff --git a/kernel/kthread.c b/kernel/kthread.c index ce4610316377..8ed4b4fbec7c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1208,18 +1208,18 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_use_mm - make the calling kthread operate on an address space + * @mm: address space to operate on */ -void use_mm(struct mm_struct *mm) +void kthread_use_mm(struct mm_struct *mm) { struct mm_struct *active_mm; struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(tsk->mm); + task_lock(tsk); active_mm = tsk->active_mm; if (active_mm != mm) { @@ -1236,20 +1236,19 @@ void use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); } -EXPORT_SYMBOL_GPL(use_mm); +EXPORT_SYMBOL_GPL(kthread_use_mm); -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_unuse_mm - reverse the effect of kthread_use_mm() + * @mm: address space to operate on */ -void unuse_mm(struct mm_struct *mm) +void kthread_unuse_mm(struct mm_struct *mm) { struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(!tsk->mm); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL; @@ -1257,7 +1256,7 @@ void unuse_mm(struct mm_struct *mm) enter_lazy_tlb(mm, tsk); task_unlock(tsk); } -EXPORT_SYMBOL_GPL(unuse_mm); +EXPORT_SYMBOL_GPL(kthread_unuse_mm); #ifdef CONFIG_BLK_CGROUP /** diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b4e9491cb320..6e94962893ee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) /* * The process p may have detached its own ->mm while exiting or through - * use_mm(), but one or more of its subthreads may still have a valid + * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ @@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; } /* - * No use_mm() user needs to read from the userspace so we are - * ok to reap it. + * No kthead_use_mm() user needs to read from the userspace so + * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; diff --git a/mm/vmacache.c b/mm/vmacache.c index d9092814c772..01a6e6688ec1 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -24,8 +24,8 @@ * task's vmacache pertains to a different mm (ie, its own). There is * nothing we can do here. * - * Also handle the case where a kernel thread has adopted this mm via use_mm(). - * That kernel thread's vmacache is not applicable to this mm. + * Also handle the case where a kernel thread has adopted this mm via + * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. */ static inline bool vmacache_valid_mm(struct mm_struct *mm) { From 37c54f9bd48663f7657a9178fe08c47e4f5b537b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:42:10 -0700 Subject: [PATCH 16/16] kernel: set USER_DS in kthread_use_mm Some architectures like arm64 and s390 require USER_DS to be set for kernel threads to access user address space, which is the whole purpose of kthread_use_mm, but other like x86 don't. That has lead to a huge mess where some callers are fixed up once they are tested on said architectures, while others linger around and yet other like io_uring try to do "clever" optimizations for what usually is just a trivial asignment to a member in the thread_struct for most architectures. Make kthread_use_mm set USER_DS, and kthread_unuse_mm restore to the previous value instead. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Michael S. Tsirkin Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Felix Kuehling Cc: Jason Wang Cc: Zhenyu Wang Cc: Zhi Wang Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200404094101.672954-7-hch@lst.de Signed-off-by: Linus Torvalds --- drivers/usb/gadget/function/f_fs.c | 4 ---- drivers/vhost/vhost.c | 3 --- fs/io-wq.c | 8 ++------ fs/io_uring.c | 4 ---- kernel/kthread.c | 6 ++++++ 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index f80b2747d7c5..490d353d5fde 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work) bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); kthread_unuse_mm(io_data->mm); - set_fs(oldfs); } io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ad3d10c121a..421710c53f6a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -329,9 +329,7 @@ static int vhost_worker(void *data) struct vhost_dev *dev = data; struct vhost_work *work, *work_next; struct llist_node *node; - mm_segment_t oldfs = get_fs(); - set_fs(USER_DS); kthread_use_mm(dev->mm); for (;;) { @@ -361,7 +359,6 @@ static int vhost_worker(void *data) } } kthread_unuse_mm(dev->mm); - set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 748621f7391e..a5e90ac39e4d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -169,7 +169,6 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) dropped_lock = true; } __set_current_state(TASK_RUNNING); - set_fs(KERNEL_DS); kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; @@ -421,14 +420,11 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) { - set_fs(KERNEL_DS); + if (!work->mm) return; - } + if (mmget_not_zero(work->mm)) { kthread_use_mm(work->mm); - if (!worker->mm) - set_fs(USER_DS); worker->mm = work->mm; /* hang on to this mm */ work->mm = NULL; diff --git a/fs/io_uring.c b/fs/io_uring.c index ec4e9d36210b..26f7bc941d01 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5989,15 +5989,12 @@ static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; const struct cred *old_cred; - mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned long timeout; int ret = 0; complete(&ctx->sq_thread_comp); - old_fs = get_fs(); - set_fs(USER_DS); old_cred = override_creds(ctx->creds); timeout = jiffies + ctx->sq_thread_idle; @@ -6102,7 +6099,6 @@ static int io_sq_thread(void *data) if (current->task_works) task_work_run(); - set_fs(old_fs); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8ed4b4fbec7c..86357cd38eb2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -52,6 +52,7 @@ struct kthread { unsigned long flags; unsigned int cpu; void *data; + mm_segment_t oldfs; struct completion parked; struct completion exited; #ifdef CONFIG_BLK_CGROUP @@ -1235,6 +1236,9 @@ void kthread_use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); + + to_kthread(tsk)->oldfs = get_fs(); + set_fs(USER_DS); } EXPORT_SYMBOL_GPL(kthread_use_mm); @@ -1249,6 +1253,8 @@ void kthread_unuse_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(!tsk->mm); + set_fs(to_kthread(tsk)->oldfs); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL;