From 230fa253df6352af12ad0a16128760b5cb3f92df Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Nov 2014 10:01:16 +0100 Subject: [PATCH 1/8] kernel: Provide READ_ONCE and ASSIGN_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Let's provide READ_ONCE/ASSIGN_ONCE that will do all accesses via scalar types as suggested by Linus Torvalds. Accesses larger than the machines word size cannot be guaranteed to be atomic. These macros will use memcpy and emit a build warning. Signed-off-by: Christian Borntraeger --- include/linux/compiler.h | 74 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d5ad7b1118fc..a1c81f80978e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) #endif +#include + +static __always_inline void data_access_exceeds_word_size(void) +#ifdef __compiletime_warning +__compiletime_warning("data access exceeds word size and won't be atomic") +#endif +; + +static __always_inline void data_access_exceeds_word_size(void) +{ +} + +static __always_inline void __read_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; +#ifdef CONFIG_64BIT + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; +#endif + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + data_access_exceeds_word_size(); + barrier(); + } +} + +static __always_inline void __assign_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; +#ifdef CONFIG_64BIT + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; +#endif + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + data_access_exceeds_word_size(); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * ASSIGN_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) + +#define ASSIGN_ONCE(val, x) \ + ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; }) + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ From e37c698270633327245beb0fbd8699db8a4b65b4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 7 Dec 2014 21:41:33 +0100 Subject: [PATCH 2/8] mm: replace ACCESS_ONCE with READ_ONCE or barriers ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Let's change the code to access the page table elements with READ_ONCE that does implicit scalar accesses for the gup code. mm_find_pmd is tricky, because m68k and sparc(32bit) define pmd_t as array of longs. This code requires just that the pmd_present and pmd_trans_huge check are done on the same value, so a barrier is sufficent. A similar case is in handle_pte_fault. On ppc44x the word size is 32 bit, but a pte is 64 bit. A barrier is ok as well. Signed-off-by: Christian Borntraeger Cc: linux-mm@kvack.org Acked-by: Paul E. McKenney --- mm/gup.c | 2 +- mm/memory.c | 11 ++++++++++- mm/rmap.c | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index cd62c8c90d4a..f2305deaef50 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -917,7 +917,7 @@ static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end, pudp = pud_offset(pgdp, addr); do { - pud_t pud = ACCESS_ONCE(*pudp); + pud_t pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) diff --git a/mm/memory.c b/mm/memory.c index 3e503831e042..d86aa88902a0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3202,7 +3202,16 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = ACCESS_ONCE(*pte); + /* + * some architectures can have larger ptes than wordsize, + * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, + * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. + * The code below just needs a consistent view for the ifs and + * we later double check anyway with the ptl lock held. So here + * a barrier will do. + */ + entry = *pte; + barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { diff --git a/mm/rmap.c b/mm/rmap.c index 19886fb2f13a..1e542744e131 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -581,7 +581,8 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) * without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ - pmde = ACCESS_ONCE(*pmd); + pmde = *pmd; + barrier(); if (!pmd_present(pmde) || pmd_trans_huge(pmde)) pmd = NULL; out: From 4f9d1382e6f80dcfa891b2c02d5a35c53be485f1 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 24 Nov 2014 10:53:46 +0100 Subject: [PATCH 3/8] x86/spinlock: Replace ACCESS_ONCE with READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Change the spinlock code to replace ACCESS_ONCE with READ_ONCE. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/x86/include/asm/spinlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 9295016485c9..12a69b406d40 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) unsigned count = SPIN_THRESHOLD; do { - if (ACCESS_ONCE(lock->tickets.head) == inc.tail) + if (READ_ONCE(lock->tickets.head) == inc.tail) goto out; cpu_relax(); } while (--count); @@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { arch_spinlock_t old, new; - old.tickets = ACCESS_ONCE(lock->tickets); + old.tickets = READ_ONCE(lock->tickets); if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) return 0; @@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return tmp.tail != tmp.head; } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } From 14cf3d977b80f5e355f8ac7547cf1b9ff9fb3e09 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 21 Nov 2014 16:29:40 +0100 Subject: [PATCH 4/8] x86/gup: Replace ACCESS_ONCE with READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Change the gup code to replace ACCESS_ONCE with READ_ONCE. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/x86/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 207d9aef662d..d7547824e763 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -15,7 +15,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking From 4218091cb45f601b889cd032e39fe6878a426e70 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 21 Nov 2014 16:21:56 +0100 Subject: [PATCH 5/8] mips/gup: Replace ACCESS_ONCE with READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Change the gup code to replace ACCESS_ONCE with READ_ONCE. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/mips/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 06ce17c2a905..8aa50e3f3fce 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -30,7 +30,7 @@ retry: return pte; #else - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #endif } From af2e7aaed1ccf30e61af3e096ac2c7df2f2d6c2a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 24 Nov 2014 10:53:11 +0100 Subject: [PATCH 6/8] arm64/spinlock: Replace ACCESS_ONCE READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Change the spinlock code to replace ACCESS_ONCE with READ_ONCE. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/arm64/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c45b7b1b7197..cee128732435 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - arch_spinlock_t lockval = ACCESS_ONCE(*lock); + arch_spinlock_t lockval = READ_ONCE(*lock); return (lockval.next - lockval.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended From 488beef1440e845751365202faace2465840ea98 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Nov 2014 11:44:26 +0100 Subject: [PATCH 7/8] arm/spinlock: Replace ACCESS_ONCE with READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Change the spinlock code to replace ACCESS_ONCE with READ_ONCE. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/arm/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index ac4bfae26702..0fa418463f49 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tickets = READ_ONCE(lock->tickets); return (tickets.next - tickets.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended From 5de72a2247ac05bde7c89039631b3d0c6186fafb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Nov 2014 13:17:34 +0100 Subject: [PATCH 8/8] s390/kvm: REPLACE barrier fixup with READ_ONCE ACCESS_ONCE does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145) Commit 1365039d0cb3 ("KVM: s390: Fix ipte locking") replace ACCESS_ONCE with barriers. Lets use READ_ONCE instead. Signed-off-by: Christian Borntraeger Acked-by: Paul E. McKenney --- arch/s390/kvm/gaccess.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6dc0ad9c7050..8f195fa904a1 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -229,12 +229,10 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); while (old.k) { cond_resched(); - old = *ic; - barrier(); + old = READ_ONCE(*ic); } new = old; new.k = 1; @@ -253,8 +251,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); @@ -269,12 +266,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); while (old.kg) { cond_resched(); - old = *ic; - barrier(); + old = READ_ONCE(*ic); } new = old; new.k = 1; @@ -288,8 +283,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh)