mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 04:31:50 +00:00
d873a364ef
The ABI mandates that the %esp register must be a multiple of 16 when executing a 'call' instruction. Commit2ab446336b
("tools/nolibc: i386: shrink _start with _start_c") simplified the _start function, but it didn't take care of the %esp alignment, causing SIGSEGV on SSE and AVX programs that use aligned move instruction (e.g., movdqa, movaps, and vmovdqa). The 'and $-16, %esp' aligns the %esp at a multiple of 16. Then 'push %eax' will subtract the %esp by 4; thus, it breaks the 16-byte alignment. Make sure the %esp is correctly aligned after the push by subtracting 12 before the push. Extra: Add 'add $12, %esp' before the 'and $-16, %esp' to avoid over-estimating for particular cases as suggested by Willy. A test program to validate the %esp alignment on _start can be found at: https://lore.kernel.org/lkml/ZOoindMFj1UKqo+s@biznet-home.integral.gnuweeb.org [ Thomas: trim Fixes tag commit id ] Cc: Zhangjin Wu <falcon@tinylab.org> Fixes:2ab446336b
("tools/nolibc: i386: shrink _start with _start_c") Reported-by: Nicholas Rosenberg <inori@vnlx.org> Acked-by: Thomas Weißschuh <linux@weissschuh.net> Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org> Reviewed-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org> Signed-off-by: Willy Tarreau <w@1wt.eu> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
181 lines
8.8 KiB
C
181 lines
8.8 KiB
C
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
|
|
/*
|
|
* i386 specific definitions for NOLIBC
|
|
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
|
|
*/
|
|
|
|
#ifndef _NOLIBC_ARCH_I386_H
|
|
#define _NOLIBC_ARCH_I386_H
|
|
|
|
#include "compiler.h"
|
|
#include "crt.h"
|
|
|
|
/* Syscalls for i386 :
|
|
* - mostly similar to x86_64
|
|
* - registers are 32-bit
|
|
* - syscall number is passed in eax
|
|
* - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
|
|
* - all registers are preserved (except eax of course)
|
|
* - the system call is performed by calling int $0x80
|
|
* - syscall return comes in eax
|
|
* - the arguments are cast to long and assigned into the target registers
|
|
* which are then simply passed as registers to the asm code, so that we
|
|
* don't have to experience issues with register constraints.
|
|
* - the syscall number is always specified last in order to allow to force
|
|
* some registers before (gcc refuses a %-register at the last position).
|
|
*
|
|
* Also, i386 supports the old_select syscall if newselect is not available
|
|
*/
|
|
#define __ARCH_WANT_SYS_OLD_SELECT
|
|
|
|
#define my_syscall0(num) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall1(num, arg1) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
register long _arg1 __asm__ ("ebx") = (long)(arg1); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "r"(_arg1), \
|
|
"0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall2(num, arg1, arg2) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
register long _arg1 __asm__ ("ebx") = (long)(arg1); \
|
|
register long _arg2 __asm__ ("ecx") = (long)(arg2); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "r"(_arg1), "r"(_arg2), \
|
|
"0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall3(num, arg1, arg2, arg3) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
register long _arg1 __asm__ ("ebx") = (long)(arg1); \
|
|
register long _arg2 __asm__ ("ecx") = (long)(arg2); \
|
|
register long _arg3 __asm__ ("edx") = (long)(arg3); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
|
|
"0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall4(num, arg1, arg2, arg3, arg4) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
register long _arg1 __asm__ ("ebx") = (long)(arg1); \
|
|
register long _arg2 __asm__ ("ecx") = (long)(arg2); \
|
|
register long _arg3 __asm__ ("edx") = (long)(arg3); \
|
|
register long _arg4 __asm__ ("esi") = (long)(arg4); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
|
|
"0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
|
|
({ \
|
|
long _ret; \
|
|
register long _num __asm__ ("eax") = (num); \
|
|
register long _arg1 __asm__ ("ebx") = (long)(arg1); \
|
|
register long _arg2 __asm__ ("ecx") = (long)(arg2); \
|
|
register long _arg3 __asm__ ("edx") = (long)(arg3); \
|
|
register long _arg4 __asm__ ("esi") = (long)(arg4); \
|
|
register long _arg5 __asm__ ("edi") = (long)(arg5); \
|
|
\
|
|
__asm__ volatile ( \
|
|
"int $0x80\n" \
|
|
: "=a" (_ret) \
|
|
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
|
|
"0"(_num) \
|
|
: "memory", "cc" \
|
|
); \
|
|
_ret; \
|
|
})
|
|
|
|
#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
|
|
({ \
|
|
long _eax = (long)(num); \
|
|
long _arg6 = (long)(arg6); /* Always in memory */ \
|
|
__asm__ volatile ( \
|
|
"pushl %[_arg6]\n\t" \
|
|
"pushl %%ebp\n\t" \
|
|
"movl 4(%%esp),%%ebp\n\t" \
|
|
"int $0x80\n\t" \
|
|
"popl %%ebp\n\t" \
|
|
"addl $4,%%esp\n\t" \
|
|
: "+a"(_eax) /* %eax */ \
|
|
: "b"(arg1), /* %ebx */ \
|
|
"c"(arg2), /* %ecx */ \
|
|
"d"(arg3), /* %edx */ \
|
|
"S"(arg4), /* %esi */ \
|
|
"D"(arg5), /* %edi */ \
|
|
[_arg6]"m"(_arg6) /* memory */ \
|
|
: "memory", "cc" \
|
|
); \
|
|
_eax; \
|
|
})
|
|
|
|
/* startup code */
|
|
/*
|
|
* i386 System V ABI mandates:
|
|
* 1) last pushed argument must be 16-byte aligned.
|
|
* 2) The deepest stack frame should be set to zero
|
|
*
|
|
*/
|
|
void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
|
|
{
|
|
__asm__ volatile (
|
|
"xor %ebp, %ebp\n" /* zero the stack frame */
|
|
"mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */
|
|
"add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */
|
|
"and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */
|
|
"sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */
|
|
"push %eax\n" /* push arg1 on stack to support plain stack modes too */
|
|
"call _start_c\n" /* transfer to c runtime */
|
|
"hlt\n" /* ensure it does not return */
|
|
);
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
#endif /* _NOLIBC_ARCH_I386_H */
|