From 8b93b4a9e1be78930eb9d640f75818993f70e065 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:09 -0600 Subject: [PATCH 1/7] arch/sparc: Remove the check #ifndef __LINUX_SPINLOCK_TYPES_H Saw these compile errors on SPARC when queued rwlock feature is enabled. CC kernel/locking/qrwlock.o In file included from ./include/asm-generic/qrwlock_types.h:5, from ./arch/sparc/include/asm/qrwlock.h:4, from kernel/locking/qrwlock.c:24: ./arch/sparc/include/asm/spinlock_types.h:5:3: error: #error "please don't include this file directly" SPARC has this guard which causes compile error when spinlock_types.h is included directly. @ifndef __LINUX_SPINLOCK_TYPES_H @ error "please don't include this file directly" @endif Remove this un-necessary "ifndef __LINUX_SPINLOCK_TYPES_H" stanza from SPARC. Signed-off-by: Babu Moger Suggested-by: David Miller Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_types.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index 9c454fdeaad8..019c0855bf8b 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -1,10 +1,6 @@ #ifndef __SPARC_SPINLOCK_TYPES_H #define __SPARC_SPINLOCK_TYPES_H -#ifndef __LINUX_SPINLOCK_TYPES_H -# error "please don't include this file directly" -#endif - typedef struct { volatile unsigned char lock; } arch_spinlock_t; From 9ab6055f959032258c0f83a070cd0d26ed7a8fc5 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:10 -0600 Subject: [PATCH 2/7] kernel/locking: Fix compile error with qrwlock.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saw these compile errors on SPARC when queued rwlock feature is enabled. CC kernel/locking/qrwlock.o kernel/locking/qrwlock.c: In function ‘queued_read_lock_slowpath’: kernel/locking/qrwlock.c:89: error: implicit declaration of function ‘arch_spin_lock’ kernel/locking/qrwlock.c:102: error: implicit declaration of function ‘arch_spin_unlock’ make[4]: *** [kernel/locking/qrwlock.o] Error 1 Include spinlock.h in qrwlock.c to fix it. Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Jane Chu Reviewed-by: Shannon Nelson Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- kernel/locking/qrwlock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index cc3ed0ccdfa2..2655f26ec882 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* From 97d9f969161d79e6a4bba247e67ce731ff861f79 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:11 -0600 Subject: [PATCH 3/7] arch/sparc: Define config parameter CPU_BIG_ENDIAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found this problem while enabling queued rwlock on SPARC. The parameter CONFIG_CPU_BIG_ENDIAN is used to clear the specific byte in qrwlock structure. Without this parameter, we clear the wrong byte. Here is the code. static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) { return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } Define CPU_BIG_ENDIAN for SPARC to fix it. Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Jane Chu Reviewed-by: Shannon Nelson Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 58243b0d21c0..eb213b5000c8 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,9 @@ config ARCH_DEFCONFIG config ARCH_PROC_KCORE_TEXT def_bool y +config CPU_BIG_ENDIAN + def_bool y + config ARCH_ATU bool default y if SPARC64 From a12ee2349312d7112b9b7c6ac2e70c5ec2ca334e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:12 -0600 Subject: [PATCH 4/7] arch/sparc: Introduce cmpxchg_u8 SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPARC supports 32 bit and 64 bit cmpxchg right now. Add support for 8 bit (1 byte) cmpxchg. This is required to support queued rwlocks feature which uses 1 byte cmpxchg. The function __cmpxchg_u8 here uses the 4 byte cas instruction with a byte manipulation to achieve 1 byte cmpxchg. Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Steve Sistare Reviewed-by: Shannon Nelson Reviewed-by: Jane Chu Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/include/asm/cmpxchg_64.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index faa2f61058c2..000f7d7a2d4a 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -87,6 +87,33 @@ __cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new) return new; } +/* + * Use 4 byte cas instruction to achieve 1 byte cmpxchg. Main logic + * here is to get the bit shift of the byte we are interested in. + * The XOR is handy for reversing the bits for big-endian byte order + */ +static inline unsigned long +__cmpxchg_u8(volatile unsigned char *m, unsigned char old, unsigned char new) +{ + unsigned long maddr = (unsigned long)m; + int bit_shift = (((unsigned long)m & 3) ^ 3) << 3; + unsigned int mask = 0xff << bit_shift; + unsigned int *ptr = (unsigned int *) (maddr & ~3); + unsigned int old32, new32, load; + unsigned int load32 = *ptr; + + do { + new32 = (load32 & ~mask) | (new << bit_shift); + old32 = (load32 & ~mask) | (old << bit_shift); + load32 = __cmpxchg_u32(ptr, old32, new32); + if (load32 == old32) + return old; + load = (load32 & mask) >> bit_shift; + } while (load == old); + + return load; +} + /* This function doesn't exist, so you'll get a linker error if something tries to do an invalid cmpxchg(). */ void __cmpxchg_called_with_bad_pointer(void); @@ -95,6 +122,8 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); case 8: From a37594f198363fd9321ece54440336fd4b2a9c8e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:13 -0600 Subject: [PATCH 5/7] arch/sparc: Enable queued rwlocks for SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable queued rwlocks for SPARC. Here are the discussions on this feature when this was introduced. https://lwn.net/Articles/572765/ https://lwn.net/Articles/582200/ Cleaned-up the arch_read_xxx and arch_write_xxx definitions in spinlock_64.h. These routines are replaced by the functions in include/asm-generic/qrwlock.h Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Jane Chu Reviewed-by: Shannon Nelson Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/qrwlock.h | 7 ++ arch/sparc/include/asm/spinlock_64.h | 124 +----------------------- arch/sparc/include/asm/spinlock_types.h | 5 +- 4 files changed, 13 insertions(+), 124 deletions(-) create mode 100644 arch/sparc/include/asm/qrwlock.h diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb213b5000c8..0e8065207596 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -83,6 +83,7 @@ config SPARC64 select ARCH_SUPPORTS_ATOMIC_RMW select HAVE_NMI select HAVE_REGS_AND_STACK_ACCESS_API + select ARCH_USE_QUEUED_RWLOCKS config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/qrwlock.h b/arch/sparc/include/asm/qrwlock.h new file mode 100644 index 000000000000..d68a4b102100 --- /dev/null +++ b/arch/sparc/include/asm/qrwlock.h @@ -0,0 +1,7 @@ +#ifndef _ASM_SPARC_QRWLOCK_H +#define _ASM_SPARC_QRWLOCK_H + +#include +#include + +#endif /* _ASM_SPARC_QRWLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 07c9f2e9bf57..8901c2d4ada9 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -10,6 +10,7 @@ #include #include +#include /* To get debugging spinlocks which detect and catch * deadlock situations, set CONFIG_DEBUG_SPINLOCK @@ -94,132 +95,9 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla : "memory"); } -/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ - -static inline void arch_read_lock(arch_rwlock_t *lock) -{ - unsigned long tmp1, tmp2; - - __asm__ __volatile__ ( -"1: ldsw [%2], %0\n" -" brlz,pn %0, 2f\n" -"4: add %0, 1, %1\n" -" cas [%2], %0, %1\n" -" cmp %0, %1\n" -" bne,pn %%icc, 1b\n" -" nop\n" -" .subsection 2\n" -"2: ldsw [%2], %0\n" -" brlz,pt %0, 2b\n" -" nop\n" -" ba,a,pt %%xcc, 4b\n" -" .previous" - : "=&r" (tmp1), "=&r" (tmp2) - : "r" (lock) - : "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *lock) -{ - int tmp1, tmp2; - - __asm__ __volatile__ ( -"1: ldsw [%2], %0\n" -" brlz,a,pn %0, 2f\n" -" mov 0, %0\n" -" add %0, 1, %1\n" -" cas [%2], %0, %1\n" -" cmp %0, %1\n" -" bne,pn %%icc, 1b\n" -" mov 1, %0\n" -"2:" - : "=&r" (tmp1), "=&r" (tmp2) - : "r" (lock) - : "memory"); - - return tmp1; -} - -static inline void arch_read_unlock(arch_rwlock_t *lock) -{ - unsigned long tmp1, tmp2; - - __asm__ __volatile__( -"1: lduw [%2], %0\n" -" sub %0, 1, %1\n" -" cas [%2], %0, %1\n" -" cmp %0, %1\n" -" bne,pn %%xcc, 1b\n" -" nop" - : "=&r" (tmp1), "=&r" (tmp2) - : "r" (lock) - : "memory"); -} - -static inline void arch_write_lock(arch_rwlock_t *lock) -{ - unsigned long mask, tmp1, tmp2; - - mask = 0x80000000UL; - - __asm__ __volatile__( -"1: lduw [%2], %0\n" -" brnz,pn %0, 2f\n" -"4: or %0, %3, %1\n" -" cas [%2], %0, %1\n" -" cmp %0, %1\n" -" bne,pn %%icc, 1b\n" -" nop\n" -" .subsection 2\n" -"2: lduw [%2], %0\n" -" brnz,pt %0, 2b\n" -" nop\n" -" ba,a,pt %%xcc, 4b\n" -" .previous" - : "=&r" (tmp1), "=&r" (tmp2) - : "r" (lock), "r" (mask) - : "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *lock) -{ - __asm__ __volatile__( -" stw %%g0, [%0]" - : /* no outputs */ - : "r" (lock) - : "memory"); -} - -static inline int arch_write_trylock(arch_rwlock_t *lock) -{ - unsigned long mask, tmp1, tmp2, result; - - mask = 0x80000000UL; - - __asm__ __volatile__( -" mov 0, %2\n" -"1: lduw [%3], %0\n" -" brnz,pn %0, 2f\n" -" or %0, %4, %1\n" -" cas [%3], %0, %1\n" -" cmp %0, %1\n" -" bne,pn %%icc, 1b\n" -" nop\n" -" mov 1, %2\n" -"2:" - : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) - : "r" (lock), "r" (mask) - : "memory"); - - return result; -} - #define arch_read_lock_flags(p, f) arch_read_lock(p) #define arch_write_lock_flags(p, f) arch_write_lock(p) -#define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) -#define arch_write_can_lock(rw) (!(rw)->lock) - #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index 019c0855bf8b..64fce21b23df 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -7,10 +7,13 @@ typedef struct { #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#ifdef CONFIG_QUEUED_RWLOCKS +#include +#else typedef struct { volatile unsigned int lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } - +#endif /* CONFIG_QUEUED_RWLOCKS */ #endif From 79d39e2bab60d18a68a5abc00be4506864397efc Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:14 -0600 Subject: [PATCH 6/7] arch/sparc: Introduce xchg16 for SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPARC supports 32 bit and 64 bit xchg right now. Add the support for 16 bit (2 byte) xchg. This is required to support queued spinlock feature which uses 2 byte xchg. This is achieved using 4 byte cas instructions with byte manipulations. Also re-arranged the code to call __cmpxchg_u32 inside xchg16. Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Steven Sistare Reviewed-by: Shannon Nelson Reviewed-by: Jane Chu Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/include/asm/cmpxchg_64.h | 49 +++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index 000f7d7a2d4a..4028f4f1e561 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -6,6 +6,17 @@ #ifndef __ARCH_SPARC64_CMPXCHG__ #define __ARCH_SPARC64_CMPXCHG__ +static inline unsigned long +__cmpxchg_u32(volatile int *m, int old, int new) +{ + __asm__ __volatile__("cas [%2], %3, %0" + : "=&r" (new) + : "0" (new), "r" (m), "r" (old) + : "memory"); + + return new; +} + static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) { unsigned long tmp1, tmp2; @@ -44,10 +55,38 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long void __xchg_called_with_bad_pointer(void); +/* + * Use 4 byte cas instruction to achieve 2 byte xchg. Main logic + * here is to get the bit shift of the byte we are interested in. + * The XOR is handy for reversing the bits for big-endian byte order. + */ +static inline unsigned long +xchg16(__volatile__ unsigned short *m, unsigned short val) +{ + unsigned long maddr = (unsigned long)m; + int bit_shift = (((unsigned long)m & 2) ^ 2) << 3; + unsigned int mask = 0xffff << bit_shift; + unsigned int *ptr = (unsigned int *) (maddr & ~2); + unsigned int old32, new32, load32; + + /* Read the old value */ + load32 = *ptr; + + do { + old32 = load32; + new32 = (load32 & (~mask)) | val << bit_shift; + load32 = __cmpxchg_u32(ptr, old32, new32); + } while (load32 != old32); + + return (load32 & mask) >> bit_shift; +} + static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size) { switch (size) { + case 2: + return xchg16(ptr, x); case 4: return xchg32(ptr, x); case 8: @@ -65,16 +104,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, #include -static inline unsigned long -__cmpxchg_u32(volatile int *m, int old, int new) -{ - __asm__ __volatile__("cas [%2], %3, %0" - : "=&r" (new) - : "0" (new), "r" (m), "r" (old) - : "memory"); - - return new; -} static inline unsigned long __cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new) From 145d978585977438ebb55079487827006c604e39 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 24 May 2017 17:55:15 -0600 Subject: [PATCH 7/7] arch/sparc: Enable queued spinlock support for SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes the necessary changes in SPARC architecture to enable queued spinlock support. Here are some of the earlier discussions about this feature. https://lwn.net/Articles/561775/ https://lwn.net/Articles/590243/ Cleaned-up the spinlock_64.h. The definitions of arch_spin_xxx are replaced by the function in Signed-off-by: Babu Moger Reviewed-by: Håkon Bugge Reviewed-by: Jane Chu Reviewed-by: Shannon Nelson Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/qspinlock.h | 7 +++ arch/sparc/include/asm/spinlock_64.h | 84 +------------------------ arch/sparc/include/asm/spinlock_types.h | 5 ++ 4 files changed, 14 insertions(+), 83 deletions(-) create mode 100644 arch/sparc/include/asm/qspinlock.h diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 0e8065207596..78af684f63b9 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -84,6 +84,7 @@ config SPARC64 select HAVE_NMI select HAVE_REGS_AND_STACK_ACCESS_API select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/qspinlock.h b/arch/sparc/include/asm/qspinlock.h new file mode 100644 index 000000000000..5ae9a2802846 --- /dev/null +++ b/arch/sparc/include/asm/qspinlock.h @@ -0,0 +1,7 @@ +#ifndef _ASM_SPARC_QSPINLOCK_H +#define _ASM_SPARC_QSPINLOCK_H + +#include +#include + +#endif /* _ASM_SPARC_QSPINLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 8901c2d4ada9..f7028f5e1a5a 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -11,89 +11,7 @@ #include #include #include - -/* To get debugging spinlocks which detect and catch - * deadlock situations, set CONFIG_DEBUG_SPINLOCK - * and rebuild your kernel. - */ - -/* Because we play games to save cycles in the non-contention case, we - * need to be extra careful about branch targets into the "spinning" - * code. They live in their own section, but the newer V9 branches - * have a shorter range than the traditional 32-bit sparc branch - * variants. The rule is that the branches that go into and out of - * the spinner sections must be pre-V9 branches. - */ - -#define arch_spin_is_locked(lp) ((lp)->lock != 0) - -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, !VAL); -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned long tmp; - - __asm__ __volatile__( -"1: ldstub [%1], %0\n" -" brnz,pn %0, 2f\n" -" nop\n" -" .subsection 2\n" -"2: ldub [%1], %0\n" -" brnz,pt %0, 2b\n" -" nop\n" -" ba,a,pt %%xcc, 1b\n" -" .previous" - : "=&r" (tmp) - : "r" (lock) - : "memory"); -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned long result; - - __asm__ __volatile__( -" ldstub [%1], %0\n" - : "=r" (result) - : "r" (lock) - : "memory"); - - return (result == 0UL); -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - __asm__ __volatile__( -" stb %%g0, [%0]" - : /* No outputs */ - : "r" (lock) - : "memory"); -} - -static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - unsigned long tmp1, tmp2; - - __asm__ __volatile__( -"1: ldstub [%2], %0\n" -" brnz,pn %0, 2f\n" -" nop\n" -" .subsection 2\n" -"2: rdpr %%pil, %1\n" -" wrpr %3, %%pil\n" -"3: ldub [%2], %0\n" -" brnz,pt %0, 3b\n" -" nop\n" -" ba,pt %%xcc, 1b\n" -" wrpr %1, %%pil\n" -" .previous" - : "=&r" (tmp1), "=&r" (tmp2) - : "r"(lock), "r"(flags) - : "memory"); -} +#include #define arch_read_lock_flags(p, f) arch_read_lock(p) #define arch_write_lock_flags(p, f) arch_write_lock(p) diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index 64fce21b23df..bce8ef44dfa9 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -1,11 +1,16 @@ #ifndef __SPARC_SPINLOCK_TYPES_H #define __SPARC_SPINLOCK_TYPES_H +#ifdef CONFIG_QUEUED_SPINLOCKS +#include +#else + typedef struct { volatile unsigned char lock; } arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#endif /* CONFIG_QUEUED_SPINLOCKS */ #ifdef CONFIG_QUEUED_RWLOCKS #include