f5caf621ee
For inline asm statements which have a CALL instruction, we list the stack pointer as a constraint to convince GCC to ensure the frame pointer is set up first: static inline void foo() { register void *__sp asm(_ASM_SP); asm("call bar" : "+r" (__sp)) } Unfortunately, that pattern causes Clang to corrupt the stack pointer. The fix is easy: convert the stack pointer register variable to a global variable. It should be noted that the end result is different based on the GCC version. With GCC 6.4, this patch has exactly the same result as before: defconfig defconfig-nofp distro distro-nofp before 9820389 9491555 8816046 8516940 after 9820389 9491555 8816046 8516940 With GCC 7.2, however, GCC's behavior has changed. It now changes its behavior based on the conversion of the register variable to a global. That somehow convinces it to *always* set up the frame pointer before inserting *any* inline asm. (Therefore, listing the variable as an output constraint is a no-op and is no longer necessary.) It's a bit overkill, but the performance impact should be negligible. And in fact, there's a nice improvement with frame pointers disabled: defconfig defconfig-nofp distro distro-nofp before 9796316 9468236 9076191 8790305 after 9796957 9464267 9076381 8785949 So in summary, while listing the stack pointer as an output constraint is no longer necessary for newer versions of GCC, it's still needed for older versions. Suggested-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Reported-by: Matthias Kaehlcke <mka@chromium.org> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Alexander Potapenko <glider@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
115 lines
3.0 KiB
C
115 lines
3.0 KiB
C
#ifndef __ASM_PREEMPT_H
|
|
#define __ASM_PREEMPT_H
|
|
|
|
#include <asm/rmwcc.h>
|
|
#include <asm/percpu.h>
|
|
#include <linux/thread_info.h>
|
|
|
|
DECLARE_PER_CPU(int, __preempt_count);
|
|
|
|
/*
|
|
* We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
|
|
* that a decrement hitting 0 means we can and should reschedule.
|
|
*/
|
|
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
|
|
|
|
/*
|
|
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
|
|
* that think a non-zero value indicates we cannot preempt.
|
|
*/
|
|
static __always_inline int preempt_count(void)
|
|
{
|
|
return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
|
|
}
|
|
|
|
static __always_inline void preempt_count_set(int pc)
|
|
{
|
|
int old, new;
|
|
|
|
do {
|
|
old = raw_cpu_read_4(__preempt_count);
|
|
new = (old & PREEMPT_NEED_RESCHED) |
|
|
(pc & ~PREEMPT_NEED_RESCHED);
|
|
} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
|
|
}
|
|
|
|
/*
|
|
* must be macros to avoid header recursion hell
|
|
*/
|
|
#define init_task_preempt_count(p) do { } while (0)
|
|
|
|
#define init_idle_preempt_count(p, cpu) do { \
|
|
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
|
|
} while (0)
|
|
|
|
/*
|
|
* We fold the NEED_RESCHED bit into the preempt count such that
|
|
* preempt_enable() can decrement and test for needing to reschedule with a
|
|
* single instruction.
|
|
*
|
|
* We invert the actual bit, so that when the decrement hits 0 we know we both
|
|
* need to resched (the bit is cleared) and can resched (no preempt count).
|
|
*/
|
|
|
|
static __always_inline void set_preempt_need_resched(void)
|
|
{
|
|
raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
|
|
}
|
|
|
|
static __always_inline void clear_preempt_need_resched(void)
|
|
{
|
|
raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
|
|
}
|
|
|
|
static __always_inline bool test_preempt_need_resched(void)
|
|
{
|
|
return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
|
|
}
|
|
|
|
/*
|
|
* The various preempt_count add/sub methods
|
|
*/
|
|
|
|
static __always_inline void __preempt_count_add(int val)
|
|
{
|
|
raw_cpu_add_4(__preempt_count, val);
|
|
}
|
|
|
|
static __always_inline void __preempt_count_sub(int val)
|
|
{
|
|
raw_cpu_add_4(__preempt_count, -val);
|
|
}
|
|
|
|
/*
|
|
* Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
|
|
* a decrement which hits zero means we have no preempt_count and should
|
|
* reschedule.
|
|
*/
|
|
static __always_inline bool __preempt_count_dec_and_test(void)
|
|
{
|
|
GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
|
|
}
|
|
|
|
/*
|
|
* Returns true when we need to resched and can (barring IRQ state).
|
|
*/
|
|
static __always_inline bool should_resched(int preempt_offset)
|
|
{
|
|
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
|
|
}
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
extern asmlinkage void ___preempt_schedule(void);
|
|
# define __preempt_schedule() \
|
|
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
|
|
|
|
extern asmlinkage void preempt_schedule(void);
|
|
extern asmlinkage void ___preempt_schedule_notrace(void);
|
|
# define __preempt_schedule_notrace() \
|
|
asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
|
|
|
|
extern asmlinkage void preempt_schedule_notrace(void);
|
|
#endif
|
|
|
|
#endif /* __ASM_PREEMPT_H */
|