forked from Minki/linux
xtensa: fix fast_syscall_spill_registers
The original implementation could clobber registers under certain conditions. The Xtensa processor architecture uses windowed registers and the original implementation was using a4 as a temporary register, which under certain conditions could be register a0 of the oldest window frame, and didn't always restore the content correctly. By moving the _spill_registers routine inside the fast system call, it frees up one more register (the return address is not required anymore) for the spill routine. Signed-off-by: Chris Zankel <chris@zankel.net>
This commit is contained in:
parent
3251f1e27a
commit
6b5a1f74e5
@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers)
|
||||
|
||||
rsr a0, sar
|
||||
s32i a3, a2, PT_AREG3
|
||||
s32i a0, a2, PT_SAR
|
||||
|
||||
/* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
|
||||
|
||||
s32i a4, a2, PT_AREG4
|
||||
s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5
|
||||
|
||||
/* The spill routine might clobber a7, a11, and a15. */
|
||||
|
||||
s32i a7, a2, PT_AREG7
|
||||
s32i a8, a2, PT_AREG8
|
||||
s32i a11, a2, PT_AREG11
|
||||
s32i a12, a2, PT_AREG12
|
||||
s32i a15, a2, PT_AREG15
|
||||
|
||||
call0 _spill_registers # destroys a3, a4, and SAR
|
||||
/*
|
||||
* Rotate ws so that the current windowbase is at bit 0.
|
||||
* Assume ws = xxxwww1yy (www1 current window frame).
|
||||
* Rotate ws right so that a4 = yyxxxwww1.
|
||||
*/
|
||||
|
||||
rsr a0, windowbase
|
||||
rsr a3, windowstart # a3 = xxxwww1yy
|
||||
ssr a0 # holds WB
|
||||
slli a0, a3, WSBITS
|
||||
or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy
|
||||
srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
|
||||
|
||||
/* We are done if there are no more than the current register frame. */
|
||||
|
||||
extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
|
||||
movi a0, (1 << (WSBITS-1))
|
||||
_beqz a3, .Lnospill # only one active frame? jump
|
||||
|
||||
/* We want 1 at the top, so that we return to the current windowbase */
|
||||
|
||||
or a3, a3, a0 # 1yyxxxwww
|
||||
|
||||
/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
|
||||
|
||||
wsr a3, windowstart # save shifted windowstart
|
||||
neg a0, a3
|
||||
and a3, a0, a3 # first bit set from right: 000010000
|
||||
|
||||
ffs_ws a0, a3 # a0: shifts to skip empty frames
|
||||
movi a3, WSBITS
|
||||
sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right
|
||||
ssr a0 # save in SAR for later.
|
||||
|
||||
rsr a3, windowbase
|
||||
add a3, a3, a0
|
||||
wsr a3, windowbase
|
||||
rsync
|
||||
|
||||
rsr a3, windowstart
|
||||
srl a3, a3 # shift windowstart
|
||||
|
||||
/* WB is now just one frame below the oldest frame in the register
|
||||
window. WS is shifted so the oldest frame is in bit 0, thus, WB
|
||||
and WS differ by one 4-register frame. */
|
||||
|
||||
/* Save frames. Depending what call was used (call4, call8, call12),
|
||||
* we have to save 4,8. or 12 registers.
|
||||
*/
|
||||
|
||||
|
||||
.Lloop: _bbsi.l a3, 1, .Lc4
|
||||
_bbci.l a3, 2, .Lc12
|
||||
|
||||
.Lc8: s32e a4, a13, -16
|
||||
l32e a4, a5, -12
|
||||
s32e a8, a4, -32
|
||||
s32e a5, a13, -12
|
||||
s32e a6, a13, -8
|
||||
s32e a7, a13, -4
|
||||
s32e a9, a4, -28
|
||||
s32e a10, a4, -24
|
||||
s32e a11, a4, -20
|
||||
srli a11, a3, 2 # shift windowbase by 2
|
||||
rotw 2
|
||||
_bnei a3, 1, .Lloop
|
||||
j .Lexit
|
||||
|
||||
.Lc4: s32e a4, a9, -16
|
||||
s32e a5, a9, -12
|
||||
s32e a6, a9, -8
|
||||
s32e a7, a9, -4
|
||||
|
||||
srli a7, a3, 1
|
||||
rotw 1
|
||||
_bnei a3, 1, .Lloop
|
||||
j .Lexit
|
||||
|
||||
.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
|
||||
|
||||
/* 12-register frame (call12) */
|
||||
|
||||
l32e a0, a5, -12
|
||||
s32e a8, a0, -48
|
||||
mov a8, a0
|
||||
|
||||
s32e a9, a8, -44
|
||||
s32e a10, a8, -40
|
||||
s32e a11, a8, -36
|
||||
s32e a12, a8, -32
|
||||
s32e a13, a8, -28
|
||||
s32e a14, a8, -24
|
||||
s32e a15, a8, -20
|
||||
srli a15, a3, 3
|
||||
|
||||
/* The stack pointer for a4..a7 is out of reach, so we rotate the
|
||||
* window, grab the stackpointer, and rotate back.
|
||||
* Alternatively, we could also use the following approach, but that
|
||||
* makes the fixup routine much more complicated:
|
||||
* rotw 1
|
||||
* s32e a0, a13, -16
|
||||
* ...
|
||||
* rotw 2
|
||||
*/
|
||||
|
||||
rotw 1
|
||||
mov a4, a13
|
||||
rotw -1
|
||||
|
||||
s32e a4, a8, -16
|
||||
s32e a5, a8, -12
|
||||
s32e a6, a8, -8
|
||||
s32e a7, a8, -4
|
||||
|
||||
rotw 3
|
||||
|
||||
_beqi a3, 1, .Lexit
|
||||
j .Lloop
|
||||
|
||||
.Lexit:
|
||||
|
||||
/* Done. Do the final rotation and set WS */
|
||||
|
||||
rotw 1
|
||||
rsr a3, windowbase
|
||||
ssl a3
|
||||
movi a3, 1
|
||||
sll a3, a3
|
||||
wsr a3, windowstart
|
||||
.Lnospill:
|
||||
|
||||
/* Advance PC, restore registers and SAR, and return from exception. */
|
||||
|
||||
l32i a3, a2, PT_AREG5
|
||||
l32i a4, a2, PT_AREG4
|
||||
l32i a3, a2, PT_SAR
|
||||
l32i a0, a2, PT_AREG0
|
||||
wsr a3, sar
|
||||
l32i a3, a2, PT_AREG3
|
||||
|
||||
/* Restore clobbered registers. */
|
||||
|
||||
l32i a4, a2, PT_AREG4
|
||||
l32i a7, a2, PT_AREG7
|
||||
l32i a8, a2, PT_AREG8
|
||||
l32i a11, a2, PT_AREG11
|
||||
l32i a12, a2, PT_AREG12
|
||||
l32i a15, a2, PT_AREG15
|
||||
|
||||
movi a2, 0
|
||||
rfe
|
||||
|
||||
.Linvalid_mask:
|
||||
|
||||
/* We get here because of an unrecoverable error in the window
|
||||
* registers, so set up a dummy frame and kill the user application.
|
||||
* Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
|
||||
*/
|
||||
|
||||
movi a0, 1
|
||||
movi a1, 0
|
||||
|
||||
wsr a0, windowstart
|
||||
wsr a1, windowbase
|
||||
rsync
|
||||
|
||||
movi a0, 0
|
||||
|
||||
rsr a3, excsave1
|
||||
l32i a1, a3, EXC_TABLE_KSTK
|
||||
|
||||
movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
|
||||
wsr a4, ps
|
||||
rsync
|
||||
|
||||
movi a6, SIGSEGV
|
||||
movi a4, do_exit
|
||||
callx4 a4
|
||||
|
||||
/* shouldn't return, so panic */
|
||||
|
||||
wsr a0, excsave1
|
||||
movi a0, unrecoverable_exception
|
||||
callx0 a0 # should not return
|
||||
1: j 1b
|
||||
|
||||
|
||||
ENDPROC(fast_syscall_spill_registers)
|
||||
|
||||
/* Fixup handler.
|
||||
@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return)
|
||||
|
||||
ENDPROC(fast_syscall_spill_registers_fixup_return)
|
||||
|
||||
/*
|
||||
* spill all registers.
|
||||
*
|
||||
* This is not a real function. The following conditions must be met:
|
||||
*
|
||||
* - must be called with call0.
|
||||
* - uses a3, a4 and SAR.
|
||||
* - the last 'valid' register of each frame are clobbered.
|
||||
* - the caller must have registered a fixup handler
|
||||
* (or be inside a critical section)
|
||||
* - PS_EXCM must be set (PS_WOE cleared?)
|
||||
*/
|
||||
|
||||
ENTRY(_spill_registers)
|
||||
|
||||
/*
|
||||
* Rotate ws so that the current windowbase is at bit 0.
|
||||
* Assume ws = xxxwww1yy (www1 current window frame).
|
||||
* Rotate ws right so that a4 = yyxxxwww1.
|
||||
*/
|
||||
|
||||
rsr a4, windowbase
|
||||
rsr a3, windowstart # a3 = xxxwww1yy
|
||||
ssr a4 # holds WB
|
||||
slli a4, a3, WSBITS
|
||||
or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy
|
||||
srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
|
||||
|
||||
/* We are done if there are no more than the current register frame. */
|
||||
|
||||
extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
|
||||
movi a4, (1 << (WSBITS-1))
|
||||
_beqz a3, .Lnospill # only one active frame? jump
|
||||
|
||||
/* We want 1 at the top, so that we return to the current windowbase */
|
||||
|
||||
or a3, a3, a4 # 1yyxxxwww
|
||||
|
||||
/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
|
||||
|
||||
wsr a3, windowstart # save shifted windowstart
|
||||
neg a4, a3
|
||||
and a3, a4, a3 # first bit set from right: 000010000
|
||||
|
||||
ffs_ws a4, a3 # a4: shifts to skip empty frames
|
||||
movi a3, WSBITS
|
||||
sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right
|
||||
ssr a4 # save in SAR for later.
|
||||
|
||||
rsr a3, windowbase
|
||||
add a3, a3, a4
|
||||
wsr a3, windowbase
|
||||
rsync
|
||||
|
||||
rsr a3, windowstart
|
||||
srl a3, a3 # shift windowstart
|
||||
|
||||
/* WB is now just one frame below the oldest frame in the register
|
||||
window. WS is shifted so the oldest frame is in bit 0, thus, WB
|
||||
and WS differ by one 4-register frame. */
|
||||
|
||||
/* Save frames. Depending what call was used (call4, call8, call12),
|
||||
* we have to save 4,8. or 12 registers.
|
||||
*/
|
||||
|
||||
_bbsi.l a3, 1, .Lc4
|
||||
_bbsi.l a3, 2, .Lc8
|
||||
|
||||
/* Special case: we have a call12-frame starting at a4. */
|
||||
|
||||
_bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first)
|
||||
|
||||
s32e a4, a1, -16 # a1 is valid with an empty spill area
|
||||
l32e a4, a5, -12
|
||||
s32e a8, a4, -48
|
||||
mov a8, a4
|
||||
l32e a4, a1, -16
|
||||
j .Lc12c
|
||||
|
||||
.Lnospill:
|
||||
ret
|
||||
|
||||
.Lloop: _bbsi.l a3, 1, .Lc4
|
||||
_bbci.l a3, 2, .Lc12
|
||||
|
||||
.Lc8: s32e a4, a13, -16
|
||||
l32e a4, a5, -12
|
||||
s32e a8, a4, -32
|
||||
s32e a5, a13, -12
|
||||
s32e a6, a13, -8
|
||||
s32e a7, a13, -4
|
||||
s32e a9, a4, -28
|
||||
s32e a10, a4, -24
|
||||
s32e a11, a4, -20
|
||||
|
||||
srli a11, a3, 2 # shift windowbase by 2
|
||||
rotw 2
|
||||
_bnei a3, 1, .Lloop
|
||||
|
||||
.Lexit: /* Done. Do the final rotation, set WS, and return. */
|
||||
|
||||
rotw 1
|
||||
rsr a3, windowbase
|
||||
ssl a3
|
||||
movi a3, 1
|
||||
sll a3, a3
|
||||
wsr a3, windowstart
|
||||
ret
|
||||
|
||||
.Lc4: s32e a4, a9, -16
|
||||
s32e a5, a9, -12
|
||||
s32e a6, a9, -8
|
||||
s32e a7, a9, -4
|
||||
|
||||
srli a7, a3, 1
|
||||
rotw 1
|
||||
_bnei a3, 1, .Lloop
|
||||
j .Lexit
|
||||
|
||||
.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
|
||||
|
||||
/* 12-register frame (call12) */
|
||||
|
||||
l32e a2, a5, -12
|
||||
s32e a8, a2, -48
|
||||
mov a8, a2
|
||||
|
||||
.Lc12c: s32e a9, a8, -44
|
||||
s32e a10, a8, -40
|
||||
s32e a11, a8, -36
|
||||
s32e a12, a8, -32
|
||||
s32e a13, a8, -28
|
||||
s32e a14, a8, -24
|
||||
s32e a15, a8, -20
|
||||
srli a15, a3, 3
|
||||
|
||||
/* The stack pointer for a4..a7 is out of reach, so we rotate the
|
||||
* window, grab the stackpointer, and rotate back.
|
||||
* Alternatively, we could also use the following approach, but that
|
||||
* makes the fixup routine much more complicated:
|
||||
* rotw 1
|
||||
* s32e a0, a13, -16
|
||||
* ...
|
||||
* rotw 2
|
||||
*/
|
||||
|
||||
rotw 1
|
||||
mov a5, a13
|
||||
rotw -1
|
||||
|
||||
s32e a4, a9, -16
|
||||
s32e a5, a9, -12
|
||||
s32e a6, a9, -8
|
||||
s32e a7, a9, -4
|
||||
|
||||
rotw 3
|
||||
|
||||
_beqi a3, 1, .Lexit
|
||||
j .Lloop
|
||||
|
||||
.Linvalid_mask:
|
||||
|
||||
/* We get here because of an unrecoverable error in the window
|
||||
* registers. If we are in user space, we kill the application,
|
||||
* however, this condition is unrecoverable in kernel space.
|
||||
*/
|
||||
|
||||
rsr a0, ps
|
||||
_bbci.l a0, PS_UM_BIT, 1f
|
||||
|
||||
/* User space: Setup a dummy frame and kill application.
|
||||
* Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
|
||||
*/
|
||||
|
||||
movi a0, 1
|
||||
movi a1, 0
|
||||
|
||||
wsr a0, windowstart
|
||||
wsr a1, windowbase
|
||||
rsync
|
||||
|
||||
movi a0, 0
|
||||
|
||||
rsr a3, excsave1
|
||||
l32i a1, a3, EXC_TABLE_KSTK
|
||||
|
||||
movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
|
||||
wsr a4, ps
|
||||
rsync
|
||||
|
||||
movi a6, SIGSEGV
|
||||
movi a4, do_exit
|
||||
callx4 a4
|
||||
|
||||
1: /* Kernel space: PANIC! */
|
||||
|
||||
wsr a0, excsave1
|
||||
movi a0, unrecoverable_exception
|
||||
callx0 a0 # should not return
|
||||
1: j 1b
|
||||
|
||||
ENDPROC(_spill_registers)
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/*
|
||||
* We should never get here. Bail out!
|
||||
|
Loading…
Reference in New Issue
Block a user