Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/asm changes from Ingo Molnar

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86: Include probe_roms.h in probe_roms.c
  x86/32: Print control and debug registers for kerenel context
  x86: Tighten dependencies of CPU_SUP_*_32
  x86/numa: Improve internode cache alignment
  x86: Fix the NMI nesting comments
  x86-64: Improve insn scheduling in SAVE_ARGS_IRQ
  x86-64: Fix CFI annotations for NMI nesting code
  bitops: Add missing parentheses to new get_order macro
  bitops: Optimise get_order()
  bitops: Adjust the comment on get_order() to describe the size==0 case
  x86/spinlocks: Eliminate TICKET_MASK
  x86-64: Handle byte-wise tail copying in memcpy() without a loop
  x86-64: Fix memcpy() to support sizes of 4Gb and above
  x86-64: Fix memset() to support sizes of 4Gb and above
  x86-64: Slightly shorten copy_page()
This commit is contained in:
Linus Torvalds 2012-03-22 09:13:24 -07:00
commit e17fdf5c67
10 changed files with 128 additions and 98 deletions

View File

@ -303,7 +303,6 @@ config X86_GENERIC
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
default "7" if NUMA
default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
@ -441,7 +440,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on !64BIT
depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
depends on !64BIT
depends on M386 || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors

View File

@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
return !!(tmp.tail ^ tmp.head);
return tmp.tail != tmp.head;
}
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
return (__ticket_t)(tmp.tail - tmp.head) > 1;
}
#ifndef CONFIG_PARAVIRT_SPINLOCKS

View File

@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
#endif
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
typedef struct arch_spinlock {
union {

View File

@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
int i;
print_modules();
__show_regs(regs, 0);
__show_regs(regs, !user_mode_vm(regs));
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
TASK_COMM_LEN, current->comm, task_pid_nr(current),

View File

@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */
testl $3, CS(%rdi)
testl $3, CS-RBP(%rsi)
je 1f
SWAPGS
/*
@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
jne 2f
mov PER_CPU_VAR(irq_stack_ptr),%rsp
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
2: /* Store previous stack value */
/* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
@ -1530,6 +1529,7 @@ ENTRY(nmi)
/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
CFI_REL_OFFSET rdx, 0
/*
* If %cs was not the kernel segment, then the NMI triggered in user
@ -1554,6 +1554,7 @@ ENTRY(nmi)
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
CFI_REMEMBER_STATE
nested_nmi:
/*
@ -1585,10 +1586,12 @@ nested_nmi:
nested_nmi_out:
popq_cfi %rdx
CFI_RESTORE rdx
/* No need to check faults here */
INTERRUPT_RETURN
CFI_RESTORE_STATE
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
@ -1620,10 +1623,15 @@ first_nmi:
* | pt_regs |
* +-------------------------+
*
* The saved RIP is used to fix up the copied RIP that a nested
* NMI may zero out. The original stack frame and the temp storage
* The saved stack frame is used to fix up the copied stack frame
* that a nested NMI may change to make the interrupted NMI iret jump
* to the repeat_nmi. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
movq (%rsp), %rdx
CFI_RESTORE rdx
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
@ -1631,22 +1639,39 @@ first_nmi:
.rept 5
pushq_cfi 6*8(%rsp)
.endr
CFI_DEF_CFA_OFFSET SS+8-RIP
/* Everything up to here is safe from nested NMIs */
/*
* If there was a nested NMI, the first NMI's iret will return
* here. But NMIs are still enabled and we can take another
* nested NMI. The nested NMI checks the interrupted RIP to see
* if it is between repeat_nmi and end_repeat_nmi, and if so
* it will just return, as we are about to repeat an NMI anyway.
* This makes it safe to copy to the stack frame that a nested
* NMI will update.
*/
repeat_nmi:
/*
* Update the stack variable to say we are still in NMI (the update
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
movq $1, 5*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
/* Do not pop rdx, nested NMIs will corrupt it */
movq 11*8(%rsp), %rdx
CFI_DEF_CFA_OFFSET SS+8-RIP
end_repeat_nmi:
/*
* Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs
* caused by an exception and nested NMI will start here, and
* can still be preempted by another NMI.
* NMI if the first NMI took an exception and reset our iret stack
* so that we repeat another NMI.
*/
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@ -1675,26 +1700,6 @@ nmi_restore:
CFI_ENDPROC
END(nmi)
/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)
/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:
ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax

View File

@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/export.h>
#include <asm/probe_roms.h>
#include <asm/pci-direct.h>
#include <asm/e820.h>
#include <asm/mmzone.h>

View File

@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
ENTRY(copy_page)
CFI_STARTPROC
subq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8
subq $2*8,%rsp
CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
@ -91,10 +89,8 @@ ENTRY(copy_page)
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
movq 2*8(%rsp),%r13
CFI_RESTORE r13
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
addq $2*8,%rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC

View File

@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movl %edx, %ecx
shrl $3, %ecx
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
movl %edx, %ecx
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
/*
* Use 32bit CMP here to avoid long NOP padding.
*/
cmp $0x20, %edx
cmpq $0x20, %rdx
jb .Lhandle_tail
/*
@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
subl $0x20, %edx
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addq $0x20, %rdx
addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
addq $0x20, %rdx
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpq $16, %rdx
cmpl $16, %edx
jb .Lless_16bytes
/*
@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
cmpq $8, %rdx
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
cmpq $4, %rdx
cmpl $4, %edx
jb .Lless_3bytes
/*
@ -169,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
cmpl $0, %edx
je .Lend
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
.Lloop_1:
movb (%rsi), %r8b
movb %r8b, (%rdi)
incq %rdi
incq %rsi
decl %edx
jnz .Lloop_1
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)
.Lend:
retq

View File

@ -19,16 +19,15 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
movl %edx,%ecx
shrl $3,%ecx
movq %rdx,%rcx
andl $7,%edx
shrq $3,%rcx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
mulq %rsi /* with rax, clobbers rdx */
imulq %rsi,%rax
rep stosq
movl %r8d,%ecx
movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
@ -50,7 +49,7 @@
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
movl %edx,%ecx
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
@ -61,12 +60,11 @@ ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
mul %rcx /* with rax, clobbers rdx */
imulq %rcx,%rax
/* align dst */
movl %edi,%r9d
@ -75,13 +73,13 @@ ENTRY(__memset)
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movl %r11d,%ecx
shrl $6,%ecx
movq %rdx,%rcx
shrq $6,%rcx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decl %ecx
decq %rcx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
@ -97,7 +95,7 @@ ENTRY(__memset)
to predict jump tables. */
.p2align 4
.Lhandle_tail:
movl %r11d,%ecx
movl %edx,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
@ -109,12 +107,11 @@ ENTRY(__memset)
jnz .Lloop_8
.Lhandle_7:
movl %r11d,%ecx
andl $7,%ecx
andl $7,%edx
jz .Lende
.p2align 4
.Lloop_1:
decl %ecx
decl %edx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
@ -125,13 +122,13 @@ ENTRY(__memset)
CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%r11
cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%r11
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC

View File

@ -4,21 +4,58 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
#include <linux/log2.h>
/* Pure 2^n version of get_order */
static inline __attribute_const__ int get_order(unsigned long size)
/*
* Runtime evaluation of get_order()
*/
static inline __attribute_const__
int __get_order(unsigned long size)
{
int order;
size = (size - 1) >> (PAGE_SHIFT - 1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}
/**
* get_order - Determine the allocation order of a memory size
* @size: The size for which to get the order
*
* Determine the allocation order of a particular sized block of memory. This
* is on a logarithmic scale, where:
*
* 0 -> 2^0 * PAGE_SIZE and below
* 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
* 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
* 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
* 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
* ...
*
* The order returned is used to find the smallest allocation granule required
* to hold an object of the specified size.
*
* The result is undefined if the size is 0.
*
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_GENERIC_GETORDER_H */