x86, percpu: Add 'percpu_read_stable()' interface for cacheable accesses

This is very useful for some common things like 'get_current()' and
'get_thread_info()', which can be used multiple times in a function, and
where the result is cacheable.

tj: Added the magical undocumented "P" modifier to UP __percpu_arg()
    to force gcc to dereference the pointer value passed in via the
    "p" input constraint.  Without this, percpu_read_stable() returns
    the address of the percpu variable.  Also added comment explaining
    the difference between percpu_read() and percpu_read_stable().

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
Linus Torvalds 2009-08-03 14:08:48 +09:00 committed by Tejun Heo
parent a33a052f19
commit ed8d9adf35
3 changed files with 21 additions and 9 deletions

View File

@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void) static __always_inline struct task_struct *get_current(void)
{ {
return percpu_read(current_task); return percpu_read_stable(current_task);
} }
#define current get_current() #define current get_current()

View File

@ -49,7 +49,7 @@
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off) #define __my_cpu_offset percpu_read(this_cpu_off)
#else #else
#define __percpu_arg(x) "%" #x #define __percpu_arg(x) "%P" #x
#endif #endif
/* /*
@ -104,36 +104,48 @@ do { \
} \ } \
} while (0) } while (0)
#define percpu_from_op(op, var) \ #define percpu_from_op(op, var, constraint) \
({ \ ({ \
typeof(var) ret__; \ typeof(var) ret__; \
switch (sizeof(var)) { \ switch (sizeof(var)) { \
case 1: \ case 1: \
asm(op "b "__percpu_arg(1)",%0" \ asm(op "b "__percpu_arg(1)",%0" \
: "=q" (ret__) \ : "=q" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 2: \ case 2: \
asm(op "w "__percpu_arg(1)",%0" \ asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 4: \ case 4: \
asm(op "l "__percpu_arg(1)",%0" \ asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
case 8: \ case 8: \
asm(op "q "__percpu_arg(1)",%0" \ asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \ : "=r" (ret__) \
: "m" (var)); \ : constraint); \
break; \ break; \
default: __bad_percpu_size(); \ default: __bad_percpu_size(); \
} \ } \
ret__; \ ret__; \
}) })
#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) /*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
* percpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
* get_current() and get_thread_info() both of which are actually
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
"m" (per_cpu__##var))
#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
"p" (&per_cpu__##var))
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)

View File

@ -213,7 +213,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
{ {
struct thread_info *ti; struct thread_info *ti;
ti = (void *)(percpu_read(kernel_stack) + ti = (void *)(percpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE); KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti; return ti;
} }