forked from Minki/linux
powerpc fixes for 4.12 #7
- three fixes for kprobes/ftrace/livepatch interactions. - properly handle data breakpoints when using the Radix MMU. - fix for perf sampling of registers during call_usermodehelper(). - properly initialise the thread_info on our emergency stacks - add an explicit flush when doing TLB invalidations for a process using NPU2. Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi Bangoria, Masami Hiramatsu. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJZTZy4AAoJEFHr6jzI4aWA9CYQAK+BIZ2wM+QEKDWUc7bHUBfJ kVkFr59VS4x9w2zL2fKijy3CTNqaEXCUhmCks7PFYxGfF437YaJGVfCBVotuY9Ce SKTkJujUUf7b1zN+lKz8d9u6AKomE9rYBLpR0LPhDrnpiLbHtyWCeFWsmOB63k4E 05EwIHGAlvIC/dc6bHoeJzSLT5agK2KcCVWjgVzZgkDi7sbYkE8qhPmo/cojSERo 48+o8beAKgU3YEI8OwraxYBlUR71DKfdL7+6xvEo8kVNj5iNMq5GWY+YLvcQgR50 3MLuGxWFZWVRfZY8rrLMajFxNXojwuWuLu/PTT0Kz2ZRgLseF+op0AH2Ezsw4pnZ CLp0sSKs9BqpwKuFCb1lHiEVnGfOb9CFy3u0nWmQjsE0Bj8HRC433x4fNQcJVUmJ ZMPXRtZaboPV9jt3UoUhtancMiXdAbTP48N7klFRuVwCOycnxW5yAFkCssFaSpsn EAidzBDODUXUV6/3paNVsZD7ehVJ/FMBgKSyAoJrcr+RZeFbn4b9m/NvdpdhQIwn iGrTMhz3YmEhxiZrStYB9aaeaaWKZxd120bnTcfFEcnMOCKUkBSICtqjGLVsBO5e rQV9P97h+kxf+Wh7DqhkC7br7URpYsYDZa9bCd+SAL1qrGeNZW/RP01ABRZWiSi4 0QVvKZ7uVzyEHIVHXOoj =a2Ax -----END PGP SIGNATURE----- Merge tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc fixes from Michael Ellerman: "Some more powerpc fixes for 4.12. Most of these actually came in last week but got held up for some more testing. - three fixes for kprobes/ftrace/livepatch interactions. - properly handle data breakpoints when using the Radix MMU. - fix for perf sampling of registers during call_usermodehelper(). - properly initialise the thread_info on our emergency stacks - add an explicit flush when doing TLB invalidations for a process using NPU2. Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi Bangoria, Masami Hiramatsu" * tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/64: Initialise thread_info for emergency stacks powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD powerpc/perf: Fix oops when kthread execs user process powerpc/64s: Handle data breakpoints in Radix mode powerpc/kprobes: Skip livepatch_handler() for jprobes powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS powerpc/kprobes: Pause function_graph tracing during jprobes handling
This commit is contained in:
commit
94a6df251d
@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
|
||||
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
|
||||
extern int kprobe_handler(struct pt_regs *regs);
|
||||
extern int kprobe_post_handler(struct pt_regs *regs);
|
||||
extern int is_current_kprobe_addr(unsigned long addr);
|
||||
#ifdef CONFIG_KPROBES_ON_FTRACE
|
||||
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
|
||||
struct kprobe_ctlblk *kcb);
|
||||
|
@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
|
||||
.balign IFETCH_ALIGN_BYTES
|
||||
do_hash_page:
|
||||
#ifdef CONFIG_PPC_STD_MMU_64
|
||||
andis. r0,r4,0xa410 /* weird error? */
|
||||
andis. r0,r4,0xa450 /* weird error? */
|
||||
bne- handle_page_fault /* if not, try to insert a HPTE */
|
||||
andis. r0,r4,DSISR_DABRMATCH@h
|
||||
bne- handle_dabr_fault
|
||||
CURRENT_THREAD_INFO(r11, r1)
|
||||
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
|
||||
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
|
||||
@ -1438,11 +1436,16 @@ do_hash_page:
|
||||
|
||||
/* Error */
|
||||
blt- 13f
|
||||
|
||||
/* Reload DSISR into r4 for the DABR check below */
|
||||
ld r4,_DSISR(r1)
|
||||
#endif /* CONFIG_PPC_STD_MMU_64 */
|
||||
|
||||
/* Here we have a page fault that hash_page can't handle. */
|
||||
handle_page_fault:
|
||||
11: ld r4,_DAR(r1)
|
||||
11: andis. r0,r4,DSISR_DABRMATCH@h
|
||||
bne- handle_dabr_fault
|
||||
ld r4,_DAR(r1)
|
||||
ld r5,_DSISR(r1)
|
||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
bl do_page_fault
|
||||
|
@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
|
||||
|
||||
int is_current_kprobe_addr(unsigned long addr)
|
||||
{
|
||||
struct kprobe *p = kprobe_running();
|
||||
return (p && (unsigned long)p->addr == addr) ? 1 : 0;
|
||||
}
|
||||
|
||||
bool arch_within_kprobe_blacklist(unsigned long addr)
|
||||
{
|
||||
return (addr >= (unsigned long)__kprobes_text_start &&
|
||||
@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* jprobes use jprobe_return() which skips the normal return
|
||||
* path of the function, and this messes up the accounting of the
|
||||
* function graph tracer.
|
||||
*
|
||||
* Pause function graph tracing while performing the jprobe function.
|
||||
*/
|
||||
pause_graph_tracing();
|
||||
|
||||
return 1;
|
||||
}
|
||||
NOKPROBE_SYMBOL(setjmp_pre_handler);
|
||||
@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
* saved regs...
|
||||
*/
|
||||
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
|
||||
/* It's OK to start function graph tracing again */
|
||||
unpause_graph_tracing();
|
||||
preempt_enable_no_resched();
|
||||
return 1;
|
||||
}
|
||||
|
@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Emergency stacks are used for a range of things, from asynchronous
|
||||
* NMIs (system reset, machine check) to synchronous, process context.
|
||||
* We set preempt_count to zero, even though that isn't necessarily correct. To
|
||||
* get the right value we'd need to copy it from the previous thread_info, but
|
||||
* doing that might fault causing more problems.
|
||||
* TODO: what to do with accounting?
|
||||
*/
|
||||
static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
|
||||
{
|
||||
ti->task = NULL;
|
||||
ti->cpu = cpu;
|
||||
ti->preempt_count = 0;
|
||||
ti->local_flags = 0;
|
||||
ti->flags = 0;
|
||||
klp_init_thread_info(ti);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stack space used when we detect a bad kernel stack pointer, and
|
||||
* early in SMP boots before relocation is enabled. Exclusive emergency
|
||||
@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
|
||||
* Since we use these as temporary stacks during secondary CPU
|
||||
* bringup, we need to get at them in real mode. This means they
|
||||
* must also be within the RMO region.
|
||||
*
|
||||
* The IRQ stacks allocated elsewhere in this file are zeroed and
|
||||
* initialized in kernel/irq.c. These are initialized here in order
|
||||
* to have emergency stacks available as early as possible.
|
||||
*/
|
||||
limit = min(safe_stack_limit(), ppc64_rma_size);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct thread_info *ti;
|
||||
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
|
||||
klp_init_thread_info(ti);
|
||||
memset(ti, 0, THREAD_SIZE);
|
||||
emerg_stack_init_thread_info(ti, i);
|
||||
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* emergency stack for NMI exception handling. */
|
||||
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
|
||||
klp_init_thread_info(ti);
|
||||
memset(ti, 0, THREAD_SIZE);
|
||||
emerg_stack_init_thread_info(ti, i);
|
||||
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
|
||||
|
||||
/* emergency stack for machine check exception handling. */
|
||||
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
|
||||
klp_init_thread_info(ti);
|
||||
memset(ti, 0, THREAD_SIZE);
|
||||
emerg_stack_init_thread_info(ti, i);
|
||||
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
|
||||
#endif
|
||||
}
|
||||
|
@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
|
||||
stdu r1,-SWITCH_FRAME_SIZE(r1)
|
||||
|
||||
/* Save all gprs to pt_regs */
|
||||
SAVE_8GPRS(0,r1)
|
||||
SAVE_8GPRS(8,r1)
|
||||
SAVE_8GPRS(16,r1)
|
||||
SAVE_8GPRS(24,r1)
|
||||
SAVE_GPR(0, r1)
|
||||
SAVE_10GPRS(2, r1)
|
||||
SAVE_10GPRS(12, r1)
|
||||
SAVE_10GPRS(22, r1)
|
||||
|
||||
/* Save previous stack pointer (r1) */
|
||||
addi r8, r1, SWITCH_FRAME_SIZE
|
||||
std r8, GPR1(r1)
|
||||
|
||||
/* Load special regs for save below */
|
||||
mfmsr r8
|
||||
@ -95,18 +99,44 @@ ftrace_call:
|
||||
bl ftrace_stub
|
||||
nop
|
||||
|
||||
/* Load ctr with the possibly modified NIP */
|
||||
ld r3, _NIP(r1)
|
||||
mtctr r3
|
||||
/* Load the possibly modified NIP */
|
||||
ld r15, _NIP(r1)
|
||||
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
cmpd r14,r3 /* has NIP been altered? */
|
||||
cmpd r14, r15 /* has NIP been altered? */
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
|
||||
/* NIP has not been altered, skip over further checks */
|
||||
beq 1f
|
||||
|
||||
/* Check if there is an active kprobe on us */
|
||||
subi r3, r14, 4
|
||||
bl is_current_kprobe_addr
|
||||
nop
|
||||
|
||||
/*
|
||||
* If r3 == 1, then this is a kprobe/jprobe.
|
||||
* else, this is livepatched function.
|
||||
*
|
||||
* The conditional branch for livepatch_handler below will use the
|
||||
* result of this comparison. For kprobe/jprobe, we just need to branch to
|
||||
* the new NIP, not call livepatch_handler. The branch below is bne, so we
|
||||
* want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
|
||||
* CR0[EQ] = (r3 == 1).
|
||||
*/
|
||||
cmpdi r3, 1
|
||||
1:
|
||||
#endif
|
||||
|
||||
/* Load CTR with the possibly modified NIP */
|
||||
mtctr r15
|
||||
|
||||
/* Restore gprs */
|
||||
REST_8GPRS(0,r1)
|
||||
REST_8GPRS(8,r1)
|
||||
REST_8GPRS(16,r1)
|
||||
REST_8GPRS(24,r1)
|
||||
REST_GPR(0,r1)
|
||||
REST_10GPRS(2,r1)
|
||||
REST_10GPRS(12,r1)
|
||||
REST_10GPRS(22,r1)
|
||||
|
||||
/* Restore possibly modified LR */
|
||||
ld r0, _LINK(r1)
|
||||
@ -119,7 +149,10 @@ ftrace_call:
|
||||
addi r1, r1, SWITCH_FRAME_SIZE
|
||||
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
/* Based on the cmpd above, if the NIP was altered handle livepatch */
|
||||
/*
|
||||
* Based on the cmpd or cmpdi above, if the NIP was altered and we're
|
||||
* not on a kprobe/jprobe, then handle livepatch.
|
||||
*/
|
||||
bne- livepatch_handler
|
||||
#endif
|
||||
|
||||
|
@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
|
||||
struct pt_regs *regs_user_copy)
|
||||
{
|
||||
regs_user->regs = task_pt_regs(current);
|
||||
regs_user->abi = perf_reg_abi(current);
|
||||
regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
|
||||
PERF_SAMPLE_REGS_ABI_NONE;
|
||||
}
|
||||
|
@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
|
||||
return mmio_atsd_reg;
|
||||
}
|
||||
|
||||
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
|
||||
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
|
||||
{
|
||||
unsigned long launch;
|
||||
|
||||
@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
|
||||
/* PID */
|
||||
launch |= pid << PPC_BITLSHIFT(38);
|
||||
|
||||
/* No flush */
|
||||
launch |= !flush << PPC_BITLSHIFT(39);
|
||||
|
||||
/* Invalidating the entire process doesn't use a va */
|
||||
return mmio_launch_invalidate(npu, launch, 0);
|
||||
}
|
||||
|
||||
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
|
||||
unsigned long pid)
|
||||
unsigned long pid, bool flush)
|
||||
{
|
||||
unsigned long launch;
|
||||
|
||||
@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
|
||||
/* PID */
|
||||
launch |= pid << PPC_BITLSHIFT(38);
|
||||
|
||||
/* No flush */
|
||||
launch |= !flush << PPC_BITLSHIFT(39);
|
||||
|
||||
return mmio_launch_invalidate(npu, launch, va);
|
||||
}
|
||||
|
||||
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
|
||||
|
||||
struct mmio_atsd_reg {
|
||||
struct npu *npu;
|
||||
int reg;
|
||||
};
|
||||
|
||||
static void mmio_invalidate_wait(
|
||||
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
|
||||
{
|
||||
struct npu *npu;
|
||||
int i, reg;
|
||||
|
||||
/* Wait for all invalidations to complete */
|
||||
for (i = 0; i <= max_npu2_index; i++) {
|
||||
if (mmio_atsd_reg[i].reg < 0)
|
||||
continue;
|
||||
|
||||
/* Wait for completion */
|
||||
npu = mmio_atsd_reg[i].npu;
|
||||
reg = mmio_atsd_reg[i].reg;
|
||||
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
|
||||
cpu_relax();
|
||||
|
||||
put_mmio_atsd_reg(npu, reg);
|
||||
|
||||
/*
|
||||
* The GPU requires two flush ATSDs to ensure all entries have
|
||||
* been flushed. We use PID 0 as it will never be used for a
|
||||
* process on the GPU.
|
||||
*/
|
||||
if (flush)
|
||||
mmio_invalidate_pid(npu, 0, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate either a single address or an entire PID depending on
|
||||
* the value of va.
|
||||
*/
|
||||
static void mmio_invalidate(struct npu_context *npu_context, int va,
|
||||
unsigned long address)
|
||||
unsigned long address, bool flush)
|
||||
{
|
||||
int i, j, reg;
|
||||
int i, j;
|
||||
struct npu *npu;
|
||||
struct pnv_phb *nphb;
|
||||
struct pci_dev *npdev;
|
||||
struct {
|
||||
struct npu *npu;
|
||||
int reg;
|
||||
} mmio_atsd_reg[NV_MAX_NPUS];
|
||||
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
|
||||
unsigned long pid = npu_context->mm->context.id;
|
||||
|
||||
/*
|
||||
@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
|
||||
|
||||
if (va)
|
||||
mmio_atsd_reg[i].reg =
|
||||
mmio_invalidate_va(npu, address, pid);
|
||||
mmio_invalidate_va(npu, address, pid,
|
||||
flush);
|
||||
else
|
||||
mmio_atsd_reg[i].reg =
|
||||
mmio_invalidate_pid(npu, pid);
|
||||
mmio_invalidate_pid(npu, pid, flush);
|
||||
|
||||
/*
|
||||
* The NPU hardware forwards the shootdown to all GPUs
|
||||
@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
|
||||
*/
|
||||
flush_tlb_mm(npu_context->mm);
|
||||
|
||||
/* Wait for all invalidations to complete */
|
||||
for (i = 0; i <= max_npu2_index; i++) {
|
||||
if (mmio_atsd_reg[i].reg < 0)
|
||||
continue;
|
||||
|
||||
/* Wait for completion */
|
||||
npu = mmio_atsd_reg[i].npu;
|
||||
reg = mmio_atsd_reg[i].reg;
|
||||
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
|
||||
cpu_relax();
|
||||
put_mmio_atsd_reg(npu, reg);
|
||||
}
|
||||
mmio_invalidate_wait(mmio_atsd_reg, flush);
|
||||
if (flush)
|
||||
/* Wait for the flush to complete */
|
||||
mmio_invalidate_wait(mmio_atsd_reg, false);
|
||||
}
|
||||
|
||||
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
|
||||
@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
|
||||
* There should be no more translation requests for this PID, but we
|
||||
* need to ensure any entries for it are removed from the TLB.
|
||||
*/
|
||||
mmio_invalidate(npu_context, 0, 0);
|
||||
mmio_invalidate(npu_context, 0, 0, true);
|
||||
}
|
||||
|
||||
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
|
||||
@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
|
||||
{
|
||||
struct npu_context *npu_context = mn_to_npu_context(mn);
|
||||
|
||||
mmio_invalidate(npu_context, 1, address);
|
||||
mmio_invalidate(npu_context, 1, address, true);
|
||||
}
|
||||
|
||||
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
|
||||
@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
|
||||
{
|
||||
struct npu_context *npu_context = mn_to_npu_context(mn);
|
||||
|
||||
mmio_invalidate(npu_context, 1, address);
|
||||
mmio_invalidate(npu_context, 1, address, true);
|
||||
}
|
||||
|
||||
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
|
||||
@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
|
||||
struct npu_context *npu_context = mn_to_npu_context(mn);
|
||||
unsigned long address;
|
||||
|
||||
for (address = start; address <= end; address += PAGE_SIZE)
|
||||
mmio_invalidate(npu_context, 1, address);
|
||||
for (address = start; address < end; address += PAGE_SIZE)
|
||||
mmio_invalidate(npu_context, 1, address, false);
|
||||
|
||||
/* Do the flush only on the final addess == end */
|
||||
mmio_invalidate(npu_context, 1, address, true);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
|
||||
@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
|
||||
/* No nvlink associated with this GPU device */
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
if (!mm) {
|
||||
/* kernel thread contexts are not supported */
|
||||
if (!mm || mm->context.id == 0) {
|
||||
/*
|
||||
* Kernel thread contexts are not supported and context id 0 is
|
||||
* reserved on the GPU.
|
||||
*/
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user