diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst index dabee3729e5a..56490c4c0c07 100644 --- a/Documentation/powerpc/syscall64-abi.rst +++ b/Documentation/powerpc/syscall64-abi.rst @@ -109,6 +109,16 @@ auxiliary vector. scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. +ptrace +------ +When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains +the system call type that can be used to distinguish between sc and scv 0 +system calls, and the different register conventions can be accounted for. + +If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was +performed with the sc instruction, if it is 0x3000 then the system call was +performed with the scv 0 instruction. + vsyscall ======== diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 2d5c6bec2b4f..93ce3ec25387 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -50,7 +50,7 @@ l_yes: 1098: nop; \ .pushsection __jump_table, "aw"; \ .long 1098b - ., LABEL - .; \ - FTR_ENTRY_LONG KEY; \ + FTR_ENTRY_LONG KEY - .; \ .popsection #endif diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 33fa5dd8ee6a..714a35f0d425 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) pgd_t *pgdir = init_mm.pgd; return __find_linux_pte(pgdir, ea, NULL, hshift); } + +/* + * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a + * physical address, without taking locks. This can be used in real-mode. + */ +static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) +{ + pte_t *ptep; + phys_addr_t pa; + int hugepage_shift; + + /* + * init_mm does not free page tables, and does not do THP. It may + * have huge pages from huge vmalloc / ioremap etc. + */ + ptep = find_init_mm_pte(addr, &hugepage_shift); + if (WARN_ON(!ptep)) + return 0; + + pa = PFN_PHYS(pte_pfn(*ptep)); + + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + + pa |= addr & ((1ul << hugepage_shift) - 1); + + return pa; +} + /* * This is what we should always use. Any other lockless page table lookup needs * careful audit against THP split. diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 4fd3a3bd5272..e06d61b668d4 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include #include #include @@ -153,25 +154,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -236,6 +218,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f24cd53ff26e..3bbdcc86d01b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here(this is iomem). Hence we are not - * worried about _PAGE_SPLITTING/collapse. Also we will not hit - * page table free, because of init_mm. - */ - ptep = find_init_mm_pte(token, &hugepage_shift); - if (!ptep) - return token; - - pa = pte_pfn(*ptep); - - /* On radix we can do hugepage mappings for io, so handle that */ - if (!hugepage_shift) - hugepage_shift = PAGE_SHIFT; - - pa <<= PAGE_SHIFT; - pa |= token & ((1ul << hugepage_shift) - 1); - return pa; + return ppc_find_vmap_phys(token); } /* diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 51bbaae94ccc..c877f074d174 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) #ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; - pte_t *ptep; vaddr = (unsigned long)PCI_FIX_ADDR(addr); if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - /* - * We won't find huge pages here (iomem). Also can't hit - * a page table free due to init_mm - */ - ptep = find_init_mm_pte(vaddr, &hugepage_shift); - if (ptep == NULL) - paddr = 0; - else { - WARN_ON(hugepage_shift); - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - } + + paddr = ppc_find_vmap_phys(vaddr); + bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 57d6b85e9b96..2af89a5e379f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; - size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - size_io = IOMMU_PAGE_ALIGN(size_io, tbl); - nio_pages = size_io >> tbl->it_page_shift; - io_order = get_iommu_order(size_io, tbl); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); - unsigned int nio_pages = size_io >> tbl->it_page_shift; + unsigned int nio_pages; + size = PAGE_ALIGN(size); + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 01ab2163659e..e8c2a6373157 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p) int ret = 0; struct kprobe *prev; struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); - struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; - } else if (ppc_inst_prefixed(prefix)) { + } else if ((unsigned long)p->addr & ~PAGE_MASK && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index dca66481d0c2..f9e1f5428b9e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); user_write_access_end(); + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } - - /* Save the siginfo outside of the unsafe block. */ - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - goto badframe; - /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 28a80d240b76..13728495ac66 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); mtspr(SPRN_TAR, user_tar); - mtspr(SPRN_FSCR, current->thread.fscr); } mtspr(SPRN_VRSAVE, user_vrsave); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7af7c70f1468..7a0f12404e0e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -23,20 +23,9 @@ #include /* Translate address of a vmalloc'd thing to a linear map address */ -static void *real_vmalloc_addr(void *x) +static void *real_vmalloc_addr(void *addr) { - unsigned long addr = (unsigned long) x; - pte_t *p; - /* - * assume we don't have huge pages in vmalloc space... - * So don't worry about THP collapse/split. Called - * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. - */ - p = find_init_mm_pte(addr, NULL); - if (!p || !pte_present(*p)) - return NULL; - addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); - return __va(addr); + return __va(ppc_find_vmap_phys((unsigned long)addr)); } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e634db4809b..004f0d4e665f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_UAMOR (SFS-88) #define STACK_SLOT_DAWR1 (SFS-96) #define STACK_SLOT_DAWRX1 (SFS-104) +#define STACK_SLOT_FSCR (SFS-112) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -686,6 +687,8 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_DAWR0(r1) std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_FSCR + std r5, STACK_SLOT_FSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION mfspr r6, SPRN_DAWR1 @@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE ld r7, STACK_SLOT_HFSCR(r1) mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_FSCR(r1) + mtspr SPRN_FSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 043bbeaf407c..a6b36a40897a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98c3b647f54d..e3d5c77a8612 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET_SET(_regs, _val) \ do { \ typeof(_val) _result = (_val); \ - /* \ - * A syscall error is signaled by CR0 SO bit \ - * and the code is stored as a positive value. \ - */ \ - if (_result < 0) { \ - SYSCALL_RET(_regs) = -_result; \ - (_regs).ccr |= 0x10000000; \ - } else { \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ SYSCALL_RET(_regs) = _result; \ - (_regs).ccr &= ~0x10000000; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ } \ } while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT