From f9f74dc218c3cfdf0b7f9a95ddae81a081bdb79d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 22 May 2021 17:41:43 -0700 Subject: [PATCH 01/10] mm/shuffle: fix section mismatch warning clang sometimes decides not to inline shuffle_zone(), but it calls a __meminit function. Without the extra __meminit annotation we get this warning: WARNING: modpost: vmlinux.o(.text+0x2a86d4): Section mismatch in reference from the function shuffle_zone() to the function .meminit.text:__shuffle_zone() The function shuffle_zone() references the function __meminit __shuffle_zone(). This is often because shuffle_zone lacks a __meminit annotation or the annotation of __shuffle_zone is wrong. shuffle_free_memory() did not show the same problem in my tests, but it could happen in theory as well, so mark both as __meminit. Link: https://lkml.kernel.org/r/20210514135952.2928094-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Nathan Chancellor Cc: Nick Desaulniers Cc: Arnd Bergmann Cc: Wei Yang Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shuffle.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shuffle.h b/mm/shuffle.h index 71b784f0b7c3..cec62984f7d3 100644 --- a/mm/shuffle.h +++ b/mm/shuffle.h @@ -10,7 +10,7 @@ DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key); extern void __shuffle_free_memory(pg_data_t *pgdat); extern bool shuffle_pick_tail(void); -static inline void shuffle_free_memory(pg_data_t *pgdat) +static inline void __meminit shuffle_free_memory(pg_data_t *pgdat) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; @@ -18,7 +18,7 @@ static inline void shuffle_free_memory(pg_data_t *pgdat) } extern void __shuffle_zone(struct zone *z); -static inline void shuffle_zone(struct zone *z) +static inline void __meminit shuffle_zone(struct zone *z) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; From f10628d2f613195132532e0fbda439eeed8d12a2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sat, 22 May 2021 17:41:46 -0700 Subject: [PATCH 02/10] Revert "mm/gup: check page posion status for coredump." While reviewing [1] I came across commit d3378e86d182 ("mm/gup: check page posion status for coredump.") and noticed that this patch is broken in two ways. First it doesn't really prevent hwpoison pages from being dumped because hwpoison pages can be marked asynchornously at any time after the check. Secondly, and more importantly, the patch introduces a ref count leak because get_dump_page takes a reference on the page which is not released. It also seems that the patch was merged incorrectly because there were follow up changes not included as well as discussions on how to address the underlying problem [2] Therefore revert the original patch. Link: http://lkml.kernel.org/r/20210429122519.15183-4-david@redhat.com [1] Link: http://lkml.kernel.org/r/57ac524c-b49a-99ec-c1e4-ef5027bfb61b@redhat.com [2] Link: https://lkml.kernel.org/r/20210505135407.31590-1-mhocko@kernel.org Fixes: d3378e86d182 ("mm/gup: check page posion status for coredump.") Signed-off-by: Michal Hocko Reviewed-by: David Hildenbrand Cc: Aili Yao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 4 ---- mm/internal.h | 20 -------------------- 2 files changed, 24 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 0697134b6a12..3ded6a5f26b2 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1593,10 +1593,6 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); - - if (ret == 1 && is_page_poisoned(page)) - return NULL; - return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/internal.h b/mm/internal.h index 54bd0dc2c23c..2f1182948aa6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -96,26 +96,6 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } -/* - * When kernel touch the user page, the user page may be have been marked - * poison but still mapped in user space, if without this page, the kernel - * can guarantee the data integrity and operation success, the kernel is - * better to check the posion status and avoid touching it, be good not to - * panic, coredump for process fatal signal is a sample case matching this - * scenario. Or if kernel can't guarantee the data integrity, it's better - * not to call this function, let kernel touch the poison page and get to - * panic. - */ -static inline bool is_page_poisoned(struct page *page) -{ - if (PageHWPoison(page)) - return true; - else if (PageHuge(page) && PageHWPoison(compound_head(page))) - return true; - - return false; -} - extern unsigned long highest_memmap_pfn; /* From a11ddb37bf367e6b5239b95ca759e5389bb46048 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Sat, 22 May 2021 17:41:49 -0700 Subject: [PATCH 03/10] ipc/mqueue, msg, sem: avoid relying on a stack reference past its expiry do_mq_timedreceive calls wq_sleep with a stack local address. The sender (do_mq_timedsend) uses this address to later call pipelined_send. This leads to a very hard to trigger race where a do_mq_timedreceive call might return and leave do_mq_timedsend to rely on an invalid address, causing the following crash: RIP: 0010:wake_q_add_safe+0x13/0x60 Call Trace: __x64_sys_mq_timedsend+0x2a9/0x490 do_syscall_64+0x80/0x680 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f5928e40343 The race occurs as: 1. do_mq_timedreceive calls wq_sleep with the address of `struct ext_wait_queue` on function stack (aliased as `ewq_addr` here) - it holds a valid `struct ext_wait_queue *` as long as the stack has not been overwritten. 2. `ewq_addr` gets added to info->e_wait_q[RECV].list in wq_add, and do_mq_timedsend receives it via wq_get_first_waiter(info, RECV) to call __pipelined_op. 3. Sender calls __pipelined_op::smp_store_release(&this->state, STATE_READY). Here is where the race window begins. (`this` is `ewq_addr`.) 4. If the receiver wakes up now in do_mq_timedreceive::wq_sleep, it will see `state == STATE_READY` and break. 5. do_mq_timedreceive returns, and `ewq_addr` is no longer guaranteed to be a `struct ext_wait_queue *` since it was on do_mq_timedreceive's stack. (Although the address may not get overwritten until another function happens to touch it, which means it can persist around for an indefinite time.) 6. do_mq_timedsend::__pipelined_op() still believes `ewq_addr` is a `struct ext_wait_queue *`, and uses it to find a task_struct to pass to the wake_q_add_safe call. In the lucky case where nothing has overwritten `ewq_addr` yet, `ewq_addr->task` is the right task_struct. In the unlucky case, __pipelined_op::wake_q_add_safe gets handed a bogus address as the receiver's task_struct causing the crash. do_mq_timedsend::__pipelined_op() should not dereference `this` after setting STATE_READY, as the receiver counterpart is now free to return. Change __pipelined_op to call wake_q_add_safe on the receiver's task_struct returned by get_task_struct, instead of dereferencing `this` which sits on the receiver's stack. As Manfred pointed out, the race potentially also exists in ipc/msg.c::expunge_all and ipc/sem.c::wake_up_sem_queue_prepare. Fix those in the same way. Link: https://lkml.kernel.org/r/20210510102950.12551-1-varad.gautam@suse.com Fixes: c5b2cbdbdac563 ("ipc/mqueue.c: update/document memory barriers") Fixes: 8116b54e7e23ef ("ipc/sem.c: document and update memory barriers") Fixes: 0d97a82ba830d8 ("ipc/msg.c: update and document memory barriers") Signed-off-by: Varad Gautam Reported-by: Matthias von Faber Acked-by: Davidlohr Bueso Acked-by: Manfred Spraul Cc: Christian Brauner Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/mqueue.c | 6 ++++-- ipc/msg.c | 6 ++++-- ipc/sem.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8031464ed4ae..4e4e61111500 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1004,12 +1004,14 @@ static inline void __pipelined_op(struct wake_q_head *wake_q, struct mqueue_inode_info *info, struct ext_wait_queue *this) { + struct task_struct *task; + list_del(&this->list); - get_task_struct(this->task); + task = get_task_struct(this->task); /* see MQ_BARRIER for purpose/pairing */ smp_store_release(&this->state, STATE_READY); - wake_q_add_safe(wake_q, this->task); + wake_q_add_safe(wake_q, task); } /* pipelined_send() - send a message directly to the task waiting in diff --git a/ipc/msg.c b/ipc/msg.c index acd1bc7af55a..6e6c8e0c9380 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -251,11 +251,13 @@ static void expunge_all(struct msg_queue *msq, int res, struct msg_receiver *msr, *t; list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { - get_task_struct(msr->r_tsk); + struct task_struct *r_tsk; + + r_tsk = get_task_struct(msr->r_tsk); /* see MSG_BARRIER for purpose/pairing */ smp_store_release(&msr->r_msg, ERR_PTR(res)); - wake_q_add_safe(wake_q, msr->r_tsk); + wake_q_add_safe(wake_q, r_tsk); } } diff --git a/ipc/sem.c b/ipc/sem.c index e0ec239680cb..bf534c74293e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -784,12 +784,14 @@ would_block: static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, struct wake_q_head *wake_q) { - get_task_struct(q->sleeper); + struct task_struct *sleeper; + + sleeper = get_task_struct(q->sleeper); /* see SEM_BARRIER_2 for purpose/pairing */ smp_store_release(&q->status, error); - wake_q_add_safe(wake_q, q->sleeper); + wake_q_add_safe(wake_q, sleeper); } static void unlink_queue(struct sem_array *sma, struct sem_queue *q) From 4d1cd3b2c5c1c32826454de3a18c6183238d47ed Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 22 May 2021 17:41:53 -0700 Subject: [PATCH 04/10] tools/testing/selftests/exec: fix link error Fix the link error by adding '-static': gcc -Wall -Wl,-z,max-page-size=0x1000 -pie load_address.c -o /home/yang/linux/tools/testing/selftests/exec/load_address_4096 /usr/bin/ld: /tmp/ccopEGun.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `stderr@@GLIBC_2.17' which may bind externally can not be used when making a shared object; recompile with -fPIC /usr/bin/ld: /tmp/ccopEGun.o(.text+0x158): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `stderr@@GLIBC_2.17' /usr/bin/ld: final link failed: bad value collect2: error: ld returned 1 exit status make: *** [Makefile:25: tools/testing/selftests/exec/load_address_4096] Error 1 Link: https://lkml.kernel.org/r/20210514092422.2367367-1-yangyingliang@huawei.com Fixes: 206e22f01941 ("tools/testing/selftests: add self-test for verifying load alignment") Signed-off-by: Yang Yingliang Cc: Chris Kennelly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/exec/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index cf69b2fcce59..dd61118df66e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ $(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ $(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ $(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ From f70b00496f2a0669fdb19a783e613bdbdedcf901 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Sat, 22 May 2021 17:41:56 -0700 Subject: [PATCH 05/10] kasan: slab: always reset the tag in get_freepointer_safe() With CONFIG_DEBUG_PAGEALLOC enabled, the kernel should also untag the object pointer, as done in get_freepointer(). Failing to do so reportedly leads to SLUB freelist corruptions that manifest as boot-time crashes. Link: https://lkml.kernel.org/r/20210514072228.534418-1-glider@google.com Signed-off-by: Alexander Potapenko Cc: Marco Elver Cc: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Elliot Berman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index 438fa8d4c970..3f96e099817a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -301,6 +301,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) if (!debug_pagealloc_enabled_static()) return get_freepointer(s, object); + object = kasan_reset_tag(object); freepointer_addr = (unsigned long)object + s->offset; copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); From 0f90b88dbcd1143e0f408502eba0af97429c502a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Sat, 22 May 2021 17:41:59 -0700 Subject: [PATCH 06/10] watchdog: reliable handling of timestamps Commit 9bf3bc949f8a ("watchdog: cleanup handling of false positives") tried to handle a virtual host stopped by the host a more straightforward and cleaner way. But it introduced a risk of false softlockup reports. The virtual host might be stopped at any time, for example between kvm_check_and_clear_guest_paused() and is_softlockup(). As a result, is_softlockup() might read the updated jiffies and detects a softlockup. A solution might be to put back kvm_check_and_clear_guest_paused() after is_softlockup() and detect it. But it would put back the cycle that complicates the logic. In fact, the handling of all the timestamps is not reliable. The code does not guarantee when and how many times the timestamps are read. For example, "period_ts" might be touched anytime also from NMI and re-read in is_softlockup(). It works just by chance. Fix all the problems by making the code even more explicit. 1. Make sure that "now" and "period_ts" timestamps are read only once. They might be changed at anytime by NMI or when the virtual guest is stopped by the host. Note that "now" timestamp does this implicitly because "jiffies" is marked volatile. 2. "now" time must be read first. The state of "period_ts" will decide whether it will be used or the period will get restarted. 3. kvm_check_and_clear_guest_paused() must be called before reading "period_ts". It touches the variable when the guest was stopped. As a result, "now" timestamp is used only when the watchdog was not touched and the guest not stopped in the meantime. "period_ts" is restarted in all other situations. Link: https://lkml.kernel.org/r/YKT55gw+RZfyoFf7@alley Fixes: 9bf3bc949f8aeefeacea4b ("watchdog: cleanup handling of false positives") Signed-off-by: Petr Mladek Reported-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7c397907d0e9..92d3bcc5a5e0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -302,10 +302,10 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT); } -static int is_softlockup(unsigned long touch_ts, unsigned long period_ts) +static int is_softlockup(unsigned long touch_ts, + unsigned long period_ts, + unsigned long now) { - unsigned long now = get_timestamp(); - if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ /* Warn about unreasonable delays. */ if (time_after(now, period_ts + get_softlockup_thresh())) @@ -353,8 +353,7 @@ static int softlockup_fn(void *data) /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { - unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); - unsigned long period_ts = __this_cpu_read(watchdog_report_ts); + unsigned long touch_ts, period_ts, now; struct pt_regs *regs = get_irq_regs(); int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; @@ -376,12 +375,23 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); + /* + * Read the current timestamp first. It might become invalid anytime + * when a virtual machine is stopped by the host or when the watchog + * is touched from NMI. + */ + now = get_timestamp(); /* * If a virtual machine is stopped by the host it can look to - * the watchdog like a soft lockup. Check to see if the host - * stopped the vm before we process the timestamps. + * the watchdog like a soft lockup. This function touches the watchdog. */ kvm_check_and_clear_guest_paused(); + /* + * The stored timestamp is comparable with @now only when not touched. + * It might get touched anytime from NMI. Make sure that is_softlockup() + * uses the same (valid) value. + */ + period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts)); /* Reset the interval when touched by known problematic code. */ if (period_ts == SOFTLOCKUP_DELAY_REPORT) { @@ -398,13 +408,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } - /* check for a softlockup - * This is done by making sure a high priority task is - * being scheduled. The task touches the watchdog to - * indicate it is getting cpu time. If it hasn't then - * this is a good indication some task is hogging the cpu - */ - duration = is_softlockup(touch_ts, period_ts); + /* Check for a softlockup. */ + touch_ts = __this_cpu_read(watchdog_touch_ts); + duration = is_softlockup(touch_ts, period_ts, now); if (unlikely(duration)) { /* * Prevent multiple soft-lockup reports if one cpu is already From f747e6667ebb2ffb8133486c9cd19800d72b0d98 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 22 May 2021 17:42:02 -0700 Subject: [PATCH 07/10] linux/bits.h: fix compilation error with GENMASK GENMASK() has an input check which uses __builtin_choose_expr() to enable a compile time sanity check of its inputs if they are known at compile time. However, it turns out that __builtin_constant_p() does not always return a compile time constant [0]. It was thought this problem was fixed with gcc 4.9 [1], but apparently this is not the case [2]. Switch to use __is_constexpr() instead which always returns a compile time constant, regardless of its inputs. Link: https://lore.kernel.org/lkml/42b4342b-aefc-a16a-0d43-9f9c0d63ba7a@rasmusvillemoes.dk [0] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [1] Link: https://lore.kernel.org/lkml/1ac7bbc2-45d9-26ed-0b33-bf382b8d858b@I-love.SAKURA.ne.jp [2] Link: https://lkml.kernel.org/r/20210511203716.117010-1-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Reported-by: Tetsuo Handa Acked-by: Arnd Bergmann Reviewed-by: Andy Shevchenko Cc: Ard Biesheuvel Cc: Yury Norov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bits.h | 2 +- include/linux/const.h | 8 ++++++++ include/linux/minmax.h | 10 ++-------- tools/include/linux/bits.h | 2 +- tools/include/linux/const.h | 8 ++++++++ 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/include/linux/const.h b/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index c0f57b0c64d9..5433c08fcc68 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,6 +2,8 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include + /* * min()/max()/clamp() macros must accomplish three things: * @@ -17,14 +19,6 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #define __no_side_effects(x, y) \ (__is_constexpr(x) && __is_constexpr(y)) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ From 43b2ec977ce33b53e2be30999824b584e2be248a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 22 May 2021 17:42:05 -0700 Subject: [PATCH 08/10] proc: remove Alexey from MAINTAINERS People Cc me and I don't have time. Link: https://lkml.kernel.org/r/YKarMxHJBIhMHQIh@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index def1640cc294..81e1edeceae4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14735,7 +14735,6 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 F: drivers/net/wireless/intersil/prism54/ PROC FILESYSTEM -R: Alexey Dobriyan L: linux-kernel@vger.kernel.org L: linux-fsdevel@vger.kernel.org S: Maintained From 1b6d63938a9d868df01d5bc6e2da212133121b8d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 22 May 2021 17:42:08 -0700 Subject: [PATCH 09/10] lib: kunit: suppress a compilation warning of frame size lib/bitfield_kunit.c: In function `test_bitfields_constants': lib/bitfield_kunit.c:93:1: warning: the frame size of 7456 bytes is larger than 2048 bytes [-Wframe-larger-than=] } ^ As the description of BITFIELD_KUNIT in lib/Kconfig.debug, it "Only useful for kernel devs running the KUnit test harness, and not intended for inclusion into a production build". Therefore, it is not worth modifying variable 'test_bitfields_constants' to clear this warning. Just suppress it. Link: https://lkml.kernel.org/r/20210518094533.7652-1-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Cc: Shuah Khan Cc: Vitor Massaru Iha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Makefile b/lib/Makefile index e11cfc18b6c0..2cc359ec1fdd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -348,6 +348,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests +CFLAGS_bitfield_kunit.o := $(call cc-option,-Wframe-larger-than=10240) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o From e32905e57358fdfb82f9de024534f205b3af7dac Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 22 May 2021 17:42:11 -0700 Subject: [PATCH 10/10] userfaultfd: hugetlbfs: fix new flag usage in error path In commit d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") the use of PagePrivate to indicate a reservation count should be restored at free time was changed to the hugetlb specific flag HPageRestoreReserve. Changes to a userfaultfd error path as well as a VM_BUG_ON() in remove_inode_hugepages() were overlooked. Users could see incorrect hugetlb reserve counts if they experience an error with a UFFDIO_COPY operation. Specifically, this would be the result of an unlikely copy_huge_page_from_user error. There is not an increased chance of hitting the VM_BUG_ON. Link: https://lkml.kernel.org/r/20210521233952.236434-1-mike.kravetz@oracle.com Fixes: d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") Signed-off-by: Mike Kravetz Reviewed-by: Mina Almasry Cc: Oscar Salvador Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- mm/userfaultfd.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9d9e0097c1d3..55efd3dd04f6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -529,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * the subpool and global reserve usage count can need * to be adjusted. */ - VM_BUG_ON(PagePrivate(page)); + VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); freed++; if (!truncate_op) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e14b3820c6a8..63a73e164d55 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -360,38 +360,38 @@ out: * If a reservation for the page existed in the reservation * map of a private mapping, the map was modified to indicate * the reservation was consumed when the page was allocated. - * We clear the PagePrivate flag now so that the global + * We clear the HPageRestoreReserve flag now so that the global * reserve count will not be incremented in free_huge_page. * The reservation map will still indicate the reservation * was consumed and possibly prevent later page allocation. * This is better than leaking a global reservation. If no - * reservation existed, it is still safe to clear PagePrivate - * as no adjustments to reservation counts were made during - * allocation. + * reservation existed, it is still safe to clear + * HPageRestoreReserve as no adjustments to reservation counts + * were made during allocation. * * The reservation map for shared mappings indicates which * pages have reservations. When a huge page is allocated * for an address with a reservation, no change is made to - * the reserve map. In this case PagePrivate will be set - * to indicate that the global reservation count should be + * the reserve map. In this case HPageRestoreReserve will be + * set to indicate that the global reservation count should be * incremented when the page is freed. This is the desired * behavior. However, when a huge page is allocated for an * address without a reservation a reservation entry is added - * to the reservation map, and PagePrivate will not be set. - * When the page is freed, the global reserve count will NOT - * be incremented and it will appear as though we have leaked - * reserved page. In this case, set PagePrivate so that the - * global reserve count will be incremented to match the - * reservation map entry which was created. + * to the reservation map, and HPageRestoreReserve will not be + * set. When the page is freed, the global reserve count will + * NOT be incremented and it will appear as though we have + * leaked reserved page. In this case, set HPageRestoreReserve + * so that the global reserve count will be incremented to + * match the reservation map entry which was created. * * Note that vm_alloc_shared is based on the flags of the vma * for which the page was originally allocated. dst_vma could * be different or NULL on error. */ if (vm_alloc_shared) - SetPagePrivate(page); + SetHPageRestoreReserve(page); else - ClearPagePrivate(page); + ClearHPageRestoreReserve(page); put_page(page); } BUG_ON(copied < 0);