linux/arch/x86/include/asm/syscall.h

171 lines
4.1 KiB
C
Raw Normal View History

/*
* Access to user system call parameters and results
*
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* See asm-generic/syscall.h for descriptions of what we must do here.
*/
#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/asm-offsets.h> /* For NR_syscalls */
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
#ifdef CONFIG_X86_64
syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. For example, for sys_recv() which is a 4-parameter syscall, the assembly now is (in slightly reordered fashion): <sys_recv>: callq <__fentry__> /* decode regs->di, ->si, ->dx and ->r10 */ mov 0x70(%rdi),%rdi mov 0x68(%rdi),%rsi mov 0x60(%rdi),%rdx mov 0x38(%rdi),%rcx [ SyS_recv() is automatically inlined by the compiler, as it is not [yet] used anywhere else ] /* clear %r9 and %r8, the 5th and 6th args */ xor %r9d,%r9d xor %r8d,%r8d /* do the actual work */ callq __sys_recvfrom /* cleanup and return */ cltq retq The only valid place in an x86-64 kernel which rightfully calls a syscall function on its own -- vsyscall -- needs to be modified to pass struct pt_regs onwards as well. To keep the syscall table generation working independent of SYSCALL_PTREGS being enabled, the stubs are named the same as the "original" syscall stubs, i.e. sys_*(). This patch is based on an original proof-of-concept | From: Linus Torvalds <torvalds@linux-foundation.org> | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being, and to update the vsyscall to the new calling convention. Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 09:53:02 +00:00
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
#else
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
#endif /* CONFIG_X86_64 */
extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
#define __NR_syscall_compat_max __NR_syscall_max
#define IA32_NR_syscalls NR_syscalls
#endif
#if defined(CONFIG_IA32_EMULATION)
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
* sign-extend the low 32 bits.
*/
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
x86: remove the x32 syscall bitmask from syscall_get_nr() Commit fca460f95e928bae373daa8295877b6905bc62b8 simplified the x32 implementation by creating a syscall bitmask, equal to 0x40000000, that could be applied to x32 syscalls such that the masked syscall number would be the same as a x86_64 syscall. While that patch was a nice way to simplify the code, it went a bit too far by adding the mask to syscall_get_nr(); returning the masked syscall numbers can cause confusion with callers that expect syscall numbers matching the x32 ABI, e.g. unmasked syscall numbers. This patch fixes this by simply removing the mask from syscall_get_nr() while preserving the other changes from the original commit. While there are several syscall_get_nr() callers in the kernel, most simply check that the syscall number is greater than zero, in this case this patch will have no effect. Of those remaining callers, they appear to be few, seccomp and ftrace, and from my testing of seccomp without this patch the original commit definitely breaks things; the seccomp filter does not correctly filter the syscalls due to the difference in syscall numbers in the BPF filter and the value from syscall_get_nr(). Applying this patch restores the seccomp BPF filter functionality on x32. I've tested this patch with the seccomp BPF filters as well as ftrace and everything looks reasonable to me; needless to say general usage seemed fine as well. Signed-off-by: Paul Moore <pmoore@redhat.com> Link: http://lkml.kernel.org/r/20130215172143.12549.10292.stgit@localhost Cc: <stable@vger.kernel.org> Cc: Will Drewry <wad@chromium.org> Cc: H. Peter Anvin <hpa@zytor.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-02-15 17:21:43 +00:00
return regs->orig_ax;
}
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
x86: remove the x32 syscall bitmask from syscall_get_nr() Commit fca460f95e928bae373daa8295877b6905bc62b8 simplified the x32 implementation by creating a syscall bitmask, equal to 0x40000000, that could be applied to x32 syscalls such that the masked syscall number would be the same as a x86_64 syscall. While that patch was a nice way to simplify the code, it went a bit too far by adding the mask to syscall_get_nr(); returning the masked syscall numbers can cause confusion with callers that expect syscall numbers matching the x32 ABI, e.g. unmasked syscall numbers. This patch fixes this by simply removing the mask from syscall_get_nr() while preserving the other changes from the original commit. While there are several syscall_get_nr() callers in the kernel, most simply check that the syscall number is greater than zero, in this case this patch will have no effect. Of those remaining callers, they appear to be few, seccomp and ftrace, and from my testing of seccomp without this patch the original commit definitely breaks things; the seccomp filter does not correctly filter the syscalls due to the difference in syscall numbers in the BPF filter and the value from syscall_get_nr(). Applying this patch restores the seccomp BPF filter functionality on x32. I've tested this patch with the seccomp BPF filters as well as ftrace and everything looks reasonable to me; needless to say general usage seemed fine as well. Signed-off-by: Paul Moore <pmoore@redhat.com> Link: http://lkml.kernel.org/r/20130215172143.12549.10292.stgit@localhost Cc: <stable@vger.kernel.org> Cc: Will Drewry <wad@chromium.org> Cc: H. Peter Anvin <hpa@zytor.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2013-02-15 17:21:43 +00:00
regs->ax = regs->orig_ax;
}
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
unsigned long error = regs->ax;
#ifdef CONFIG_IA32_EMULATION
/*
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
*/
error = (long) (int) error;
#endif
return IS_ERR_VALUE(error) ? error : 0;
}
static inline long syscall_get_return_value(struct task_struct *task,
struct pt_regs *regs)
{
return regs->ax;
}
static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
regs->ax = (long) error ?: val;
}
#ifdef CONFIG_X86_32
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned long *args)
{
syscalls: Remove start and number from syscall_get_arguments() args At Linux Plumbers, Andy Lutomirski approached me and pointed out that the function call syscall_get_arguments() implemented in x86 was horribly written and not optimized for the standard case of passing in 0 and 6 for the starting index and the number of system calls to get. When looking at all the users of this function, I discovered that all instances pass in only 0 and 6 for these arguments. Instead of having this function handle different cases that are never used, simply rewrite it to return the first 6 arguments of a system call. This should help out the performance of tracing system calls by ptrace, ftrace and perf. Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Dave Martin <dave.martin@arm.com> Cc: "Dmitry V. Levin" <ldv@altlinux.org> Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org> Reported-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2016-11-07 21:26:37 +00:00
memcpy(args, &regs->bx, 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
const unsigned long *args)
{
BUG_ON(i + n > 6);
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
{
return AUDIT_ARCH_I386;
}
#else /* CONFIG_X86_64 */
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
syscalls: Remove start and number from syscall_get_arguments() args At Linux Plumbers, Andy Lutomirski approached me and pointed out that the function call syscall_get_arguments() implemented in x86 was horribly written and not optimized for the standard case of passing in 0 and 6 for the starting index and the number of system calls to get. When looking at all the users of this function, I discovered that all instances pass in only 0 and 6 for these arguments. Instead of having this function handle different cases that are never used, simply rewrite it to return the first 6 arguments of a system call. This should help out the performance of tracing system calls by ptrace, ftrace and perf. Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Dave Martin <dave.martin@arm.com> Cc: "Dmitry V. Levin" <ldv@altlinux.org> Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org> Reported-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2016-11-07 21:26:37 +00:00
if (task->thread_info.status & TS_COMPAT) {
*args++ = regs->bx;
*args++ = regs->cx;
*args++ = regs->dx;
*args++ = regs->si;
*args++ = regs->di;
*args = regs->bp;
} else
# endif
syscalls: Remove start and number from syscall_get_arguments() args At Linux Plumbers, Andy Lutomirski approached me and pointed out that the function call syscall_get_arguments() implemented in x86 was horribly written and not optimized for the standard case of passing in 0 and 6 for the starting index and the number of system calls to get. When looking at all the users of this function, I discovered that all instances pass in only 0 and 6 for these arguments. Instead of having this function handle different cases that are never used, simply rewrite it to return the first 6 arguments of a system call. This should help out the performance of tracing system calls by ptrace, ftrace and perf. Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Dave Martin <dave.martin@arm.com> Cc: "Dmitry V. Levin" <ldv@altlinux.org> Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org> Reported-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2016-11-07 21:26:37 +00:00
{
*args++ = regs->di;
*args++ = regs->si;
*args++ = regs->dx;
*args++ = regs->r10;
*args++ = regs->r8;
*args = regs->r9;
}
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
syscalls: Remove start and number from syscall_set_arguments() args After removing the start and count arguments of syscall_get_arguments() it seems reasonable to remove them from syscall_set_arguments(). Note, as of today, there are no users of syscall_set_arguments(). But we are told that there will be soon. But for now, at least make it consistent with syscall_get_arguments(). Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Dave Martin <dave.martin@arm.com> Cc: "Dmitry V. Levin" <ldv@altlinux.org> Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-03-28 00:07:31 +00:00
if (task->thread_info.status & TS_COMPAT) {
regs->bx = *args++;
regs->cx = *args++;
regs->dx = *args++;
regs->si = *args++;
regs->di = *args++;
regs->bp = *args;
} else
# endif
syscalls: Remove start and number from syscall_set_arguments() args After removing the start and count arguments of syscall_get_arguments() it seems reasonable to remove them from syscall_set_arguments(). Note, as of today, there are no users of syscall_set_arguments(). But we are told that there will be soon. But for now, at least make it consistent with syscall_get_arguments(). Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: Dave Martin <dave.martin@arm.com> Cc: "Dmitry V. Levin" <ldv@altlinux.org> Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-03-28 00:07:31 +00:00
{
regs->di = *args++;
regs->si = *args++;
regs->dx = *args++;
regs->r10 = *args++;
regs->r8 = *args++;
regs->r9 = *args;
}
}
static inline int syscall_get_arch(void)
{
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALL_H */