diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst index bf28ac0401f3..7eb7c6023e09 100644 --- a/Documentation/core-api/protection-keys.rst +++ b/Documentation/core-api/protection-keys.rst @@ -12,7 +12,10 @@ Pkeys Userspace (PKU) is a feature which can be found on: * Intel server CPUs, Skylake and later * Intel client CPUs, Tiger Lake (11th Gen Core) and later * Future AMD CPUs + * arm64 CPUs implementing the Permission Overlay Extension (FEAT_S1POE) +x86_64 +====== Pkeys work by dedicating 4 previously Reserved bits in each page table entry to a "protection key", giving 16 possible keys. @@ -28,6 +31,22 @@ register. The feature is only available in 64-bit mode, even though there is theoretically space in the PAE PTEs. These permissions are enforced on data access only and have no effect on instruction fetches. +arm64 +===== + +Pkeys use 3 bits in each page table entry, to encode a "protection key index", +giving 8 possible keys. + +Protections for each key are defined with a per-CPU user-writable system +register (POR_EL0). This is a 64-bit register encoding read, write and execute +overlay permissions for each protection key index. + +Being a CPU register, POR_EL0 is inherently thread-local, potentially giving +each thread a different set of protections from every other thread. + +Unlike x86_64, the protection key permissions also apply to instruction +fetches. + Syscalls ======== @@ -38,11 +57,10 @@ There are 3 system calls which directly interact with pkeys:: int pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); -Before a pkey can be used, it must first be allocated with -pkey_alloc(). An application calls the WRPKRU instruction -directly in order to change access permissions to memory covered -with a key. In this example WRPKRU is wrapped by a C function -called pkey_set(). +Before a pkey can be used, it must first be allocated with pkey_alloc(). An +application writes to the architecture specific CPU register directly in order +to change access permissions to memory covered with a key. In this example +this is wrapped by a C function called pkey_set(). :: int real_prot = PROT_READ|PROT_WRITE; @@ -64,9 +82,9 @@ is no longer in use:: munmap(ptr, PAGE_SIZE); pkey_free(pkey); -.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions. - An example implementation can be found in - tools/testing/selftests/x86/protection_keys.c. +.. note:: pkey_set() is a wrapper around writing to the CPU register. + Example implementations can be found in + tools/testing/selftests/mm/pkey-{arm64,powerpc,x86}.h Behavior ======== @@ -96,3 +114,7 @@ with a read():: The kernel will send a SIGSEGV in both cases, but si_code will be set to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when the plain mprotect() permissions are violated. + +Note that kernel accesses from a kthread (such as io_uring) will use a default +value for the protection key register and so will not be consistent with +userspace's value of the register or mprotect(). diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 60bebb3e7d48..2968a33bb3bc 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -466,6 +466,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.cpu_context.x19 = (unsigned long)args->fn; p->thread.cpu_context.x20 = (unsigned long)args->fn_arg; + + if (system_supports_poe()) + p->thread.por_el0 = POR_EL0_INIT; } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2eb2e97a934f..14ac6fdb872b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -72,10 +73,62 @@ struct rt_sigframe_user_layout { unsigned long end_offset; }; -#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +/* + * Holds any EL0-controlled state that influences unprivileged memory accesses. + * This includes both accesses done in userspace and uaccess done in the kernel. + * + * This state needs to be carefully managed to ensure that it doesn't cause + * uaccess to fail when setting up the signal frame, and the signal handler + * itself also expects a well-defined state when entered. + */ +struct user_access_state { + u64 por_el0; +}; + #define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) #define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) +/* + * Save the user access state into ua_state and reset it to disable any + * restrictions. + */ +static void save_reset_user_access_state(struct user_access_state *ua_state) +{ + if (system_supports_poe()) { + u64 por_enable_all = 0; + + for (int pkey = 0; pkey < arch_max_pkey(); pkey++) + por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY); + + ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); + write_sysreg_s(por_enable_all, SYS_POR_EL0); + /* Ensure that any subsequent uaccess observes the updated value */ + isb(); + } +} + +/* + * Set the user access state for invoking the signal handler. + * + * No uaccess should be done after that function is called. + */ +static void set_handler_user_access_state(void) +{ + if (system_supports_poe()) + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); +} + +/* + * Restore the user access state to the values saved in ua_state. + * + * No uaccess should be done after that function is called. + */ +static void restore_user_access_state(const struct user_access_state *ua_state) +{ + if (system_supports_poe()) + write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); +} + static void init_user_layout(struct rt_sigframe_user_layout *user) { const size_t reserved_size = @@ -269,18 +322,20 @@ static int restore_fpmr_context(struct user_ctxs *user) return err; } -static int preserve_poe_context(struct poe_context __user *ctx) +static int preserve_poe_context(struct poe_context __user *ctx, + const struct user_access_state *ua_state) { int err = 0; __put_user_error(POE_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err); + __put_user_error(ua_state->por_el0, &ctx->por_el0, err); return err; } -static int restore_poe_context(struct user_ctxs *user) +static int restore_poe_context(struct user_ctxs *user, + struct user_access_state *ua_state) { u64 por_el0; int err = 0; @@ -290,7 +345,7 @@ static int restore_poe_context(struct user_ctxs *user) __get_user_error(por_el0, &(user->poe->por_el0), err); if (!err) - write_sysreg_s(por_el0, SYS_POR_EL0); + ua_state->por_el0 = por_el0; return err; } @@ -946,7 +1001,8 @@ invalid: } static int restore_sigframe(struct pt_regs *regs, - struct rt_sigframe __user *sf) + struct rt_sigframe __user *sf, + struct user_access_state *ua_state) { sigset_t set; int i, err; @@ -998,7 +1054,7 @@ static int restore_sigframe(struct pt_regs *regs, err = restore_zt_context(&user); if (err == 0 && system_supports_poe() && user.poe) - err = restore_poe_context(&user); + err = restore_poe_context(&user, ua_state); return err; } @@ -1059,6 +1115,7 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; + struct user_access_state ua_state; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -1075,7 +1132,7 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(frame, sizeof (*frame))) goto badframe; - if (restore_sigframe(regs, frame)) + if (restore_sigframe(regs, frame, &ua_state)) goto badframe; if (gcs_restore_signal()) @@ -1084,6 +1141,8 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_altstack(&frame->uc.uc_stack)) goto badframe; + restore_user_access_state(&ua_state); + return regs->regs[0]; badframe: @@ -1198,7 +1257,8 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, } static int setup_sigframe(struct rt_sigframe_user_layout *user, - struct pt_regs *regs, sigset_t *set) + struct pt_regs *regs, sigset_t *set, + const struct user_access_state *ua_state) { int i, err = 0; struct rt_sigframe __user *sf = user->sigframe; @@ -1262,14 +1322,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_fpmr_context(fpmr_ctx); } - if (system_supports_poe() && err == 0 && user->poe_offset) { + if (system_supports_poe() && err == 0) { struct poe_context __user *poe_ctx = apply_user_offset(user, user->poe_offset); - err |= preserve_poe_context(poe_ctx); + err |= preserve_poe_context(poe_ctx, ua_state); } - /* ZA state if present */ if (system_supports_sme() && err == 0 && user->za_offset) { struct za_context __user *za_ctx = @@ -1447,9 +1506,6 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, sme_smstop(); } - if (system_supports_poe()) - write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); - if (ksig->ka.sa.sa_flags & SA_RESTORER) sigtramp = ksig->ka.sa.sa_restorer; else @@ -1465,6 +1521,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, { struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; + struct user_access_state ua_state; int err = 0; fpsimd_signal_preserve_current_state(); @@ -1472,13 +1529,14 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, if (get_sigframe(&user, ksig, regs)) return 1; + save_reset_user_access_state(&ua_state); frame = user.sigframe; __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(&user, regs, set); + err |= setup_sigframe(&user, regs, set, &ua_state); if (err == 0) { err = setup_return(regs, ksig, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { @@ -1488,6 +1546,11 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, } } + if (err == 0) + set_handler_user_access_state(); + else + restore_user_access_state(&ua_state); + return err; } diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 285fe7ad490d..3e8051fe8296 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -763,7 +763,7 @@ static int sdei_device_freeze(struct device *dev) int err; /* unregister private events */ - cpuhp_remove_state(sdei_entry_point); + cpuhp_remove_state(sdei_hp_state); err = sdei_unregister_shared(); if (err) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f805adaa316e..cd6f9aae311f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -80,7 +80,11 @@ #define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif +#ifdef __SANITIZE_HWADDRESS__ +#define __no_sanitize_address __attribute__((__no_sanitize__("hwaddress"))) +#else #define __no_sanitize_address __attribute__((__no_sanitize_address__)) +#endif #if defined(__SANITIZE_THREAD__) #define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..0f8c110e0805 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -105,12 +105,12 @@ endif ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif -else -ifneq (,$(filter $(ARCH),arm64 powerpc)) +else ifeq ($(ARCH),arm64) +TEST_GEN_FILES += protection_keys +TEST_GEN_FILES += pkey_sighandler_tests +else ifeq ($(ARCH),powerpc) TEST_GEN_FILES += protection_keys -endif - endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index 580e1b0bb38e..d9d2100eafc0 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -31,6 +31,7 @@ #define NR_RESERVED_PKEYS 1 /* pkey-0 */ #define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 #define PAGE_SIZE sysconf(_SC_PAGESIZE) @@ -126,7 +127,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) return 0; } -static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) { struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); struct poe_context *poe_ctx = diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 9ab6a3ee153b..f7cfe163b0ff 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -112,6 +112,13 @@ void record_pkey_malloc(void *ptr, long size, int prot); #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) #endif +/* + * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged. + */ +#ifndef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 +#endif + #ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 5f28e26a2511..ac91777c8917 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -34,6 +34,8 @@ #define PAGE_SIZE 4096 #define MB (1<<20) +#define PKEY_REG_ALLOW_NONE 0x55555555 + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index a8088b645ad6..c593a426341c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -11,6 +11,7 @@ */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ +#include #include #include #include @@ -59,12 +60,58 @@ long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) : "=a"(ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory"); +#elif defined __aarch64__ + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + register long x5 asm("x5") = a6; + register long x8 asm("x8") = n; + asm volatile ("svc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8) + : "memory"); + ret = x0; #else # error syscall_raw() not implemented #endif return ret; } +static inline long clone_raw(unsigned long flags, void *stack, + int *parent_tid, int *child_tid) +{ + long a1 = flags; + long a2 = (long)stack; + long a3 = (long)parent_tid; +#if defined(__x86_64__) || defined(__i386) + long a4 = (long)child_tid; + long a5 = 0; +#elif defined(__aarch64__) + long a4 = 0; + long a5 = (long)child_tid; +#else +# error clone_raw() not implemented +#endif + + return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0); +} + +/* + * Returns the most restrictive pkey register value that can be used by the + * tests. + */ +static inline u64 pkey_reg_restrictive_default(void) +{ + /* + * Disallow everything except execution on pkey 0, so that each caller + * doesn't need to enable it explicitly (the selftest code runs with + * its code mapped with pkey 0). + */ + return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS); +} + static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) { pthread_mutex_lock(&mutex); @@ -113,7 +160,7 @@ static void raise_sigusr2(void) static void *thread_segv_with_pkey0_disabled(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ *(int *) (0x1) = 1; @@ -123,7 +170,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) static void *thread_segv_pkuerr_stack(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* After we disable MPK 0, we can't access the stack to return */ return NULL; @@ -133,6 +180,7 @@ static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; int *bad = (int *)1; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -142,7 +190,9 @@ static void *thread_segv_maperr_ptr(void *ptr) syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 1 is enabled. */ - __write_pkey_reg(0x55555551); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Segfault */ *bad = 1; @@ -240,6 +290,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -257,7 +308,10 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) assert(stack != MAP_FAILED); /* Allow access to MPK 0 and MPK 1 */ - __write_pkey_reg(0x55555550); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ pkey = pkey_alloc(0, 0); @@ -272,14 +326,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) memset(&siginfo, 0, sizeof(siginfo)); /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; @@ -307,7 +360,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) static void test_pkru_preserved_after_sigusr1(void) { struct sigaction sa; - unsigned long pkru = 0x45454544; + u64 pkey_reg; + + /* Allow access to MPK 0 and an arbitrary set of keys */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED); sa.sa_flags = SA_SIGINFO; @@ -320,7 +379,7 @@ static void test_pkru_preserved_after_sigusr1(void) memset(&siginfo, 0, sizeof(siginfo)); - __write_pkey_reg(pkru); + __write_pkey_reg(pkey_reg); raise(SIGUSR1); @@ -330,7 +389,7 @@ static void test_pkru_preserved_after_sigusr1(void) pthread_mutex_unlock(&mutex); /* Ensure the pkru value is the same after returning from signal. */ - ksft_test_result(pkru == __read_pkey_reg() && + ksft_test_result(pkey_reg == __read_pkey_reg() && siginfo.si_signo == SIGUSR1, "%s\n", __func__); } @@ -347,6 +406,7 @@ static noinline void *thread_sigusr2_self(void *ptr) 'S', 'I', 'G', 'U', 'S', 'R', '2', '.', '.', '.', '\n', '\0'}; stack_t *stack = ptr; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -356,7 +416,9 @@ static noinline void *thread_sigusr2_self(void *ptr) syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 2 is enabled. */ - __write_pkey_reg(0x55555545); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); raise_sigusr2(); @@ -384,6 +446,7 @@ static void test_pkru_sigreturn(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_handler = SIG_DFL; sa.sa_flags = 0; @@ -418,7 +481,10 @@ static void test_pkru_sigreturn(void) * the current thread's stack is protected by the default MPK 0. Hence * both need to be enabled. */ - __write_pkey_reg(0x55555544); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ pkey = pkey_alloc(0, 0); @@ -431,14 +497,13 @@ static void test_pkru_sigreturn(void) sigstack.ss_size = STACK_SIZE; /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret;