From 69111bac42f5ceacdd22e30947837ceb2c4493ed Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Oct 2014 15:23:25 -0500 Subject: [PATCH 001/101] powerpc: Replace __get_cpu_var uses This still has not been merged and now powerpc is the only arch that does not have this change. Sorry about missing linuxppc-dev before. V2->V2 - Fix up to work against 3.18-rc1 __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter [mpe: Fix build errors caused by set/or_softirq_pending(), and rework assignment in __set_breakpoint() to use memcpy().] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 7 +++++- arch/powerpc/include/asm/tlbflush.h | 4 ++-- arch/powerpc/include/asm/xics.h | 8 +++---- arch/powerpc/kernel/dbell.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 6 ++--- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 6 ++--- arch/powerpc/kernel/mce.c | 24 +++++++++---------- arch/powerpc/kernel/process.c | 10 ++++---- arch/powerpc/kernel/smp.c | 6 ++--- arch/powerpc/kernel/sysfs.c | 4 ++-- arch/powerpc/kernel/time.c | 22 ++++++++--------- arch/powerpc/kernel/traps.c | 8 +++---- arch/powerpc/kvm/e500.c | 14 +++++------ arch/powerpc/kvm/e500mc.c | 4 ++-- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 ++--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 22 ++++++++--------- arch/powerpc/perf/core-fsl-emb.c | 6 ++--- arch/powerpc/platforms/cell/interrupt.c | 6 ++--- .../platforms/powernv/opal-tracepoints.c | 4 ++-- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 4 ++-- arch/powerpc/platforms/pseries/iommu.c | 8 +++---- arch/powerpc/platforms/pseries/lpar.c | 6 ++--- arch/powerpc/platforms/pseries/ras.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 32 files changed, 108 insertions(+), 103 deletions(-) diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 1bbb3013d6aa..8add8b861e8d 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,7 +21,12 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending +#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 2def01ed0cb2..cd7c2719d3ef 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0d050ea37a04..6997f4a271df 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -98,7 +98,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -111,7 +111,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -121,7 +121,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -133,7 +133,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index d55c76c571f3..f4217819cc31 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __get_cpu_var(irq_stat).doorbell_irqs++; + __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1f7d84e2e8b2..05e804cdecaa 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __get_cpu_var(bp_per_reg); + bp = __this_cpu_read(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a10642a0d861..71e60bfb89e2 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c14383575fe8..8d87bb162ecd 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __get_cpu_var(irq_stat).spurious_irqs++; + __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 8504657379f1..e77c3ccf8dcf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - &__get_cpu_var(kgdb_thread_info); + this_cpu_ptr(&kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2f72af82513c..7c053f281406 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a7fd4cb78b78..15c99b649b04 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __get_cpu_var(mce_nest_count)++; - struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + int index = __this_cpu_inc_return(mce_nest_count); + struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __get_cpu_var(mce_nest_count) - 1; + int index = __this_cpu_read(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = &__get_cpu_var(mce_event[index]); + mc_evt = this_cpu_ptr(&mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __get_cpu_var(mce_nest_count)--; + __this_cpu_dec(mce_nest_count); return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __get_cpu_var(mce_queue_count)++; + index = __this_cpu_inc_return(mce_queue_count); /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __get_cpu_var(mce_queue_count)--; + __this_cpu_dec(mce_queue_count); return; } - __get_cpu_var(mce_event_queue[index]) = evt; + memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__get_cpu_var(mce_queue_count) > 0) { - index = __get_cpu_var(mce_queue_count) - 1; + while (__this_cpu_read(mce_queue_count) > 0) { + index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - &__get_cpu_var(mce_event_queue[index])); - __get_cpu_var(mce_queue_count)--; + this_cpu_ptr(&mce_event_queue[index])); + __this_cpu_dec(mce_queue_count); } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 923cd2daba89..91e132b495bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -499,7 +499,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __get_cpu_var(current_brk) = *brk; + memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -842,7 +842,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -856,7 +856,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -866,7 +866,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -889,7 +889,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 71e186d5f331..8b2d2dc8ef10 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -243,7 +243,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = &__get_cpu_var(ipi_message); + struct cpu_messages *info = this_cpu_ptr(&ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -442,9 +442,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); smp_wmb(); - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 67fd2fd2620a..fa1fd8a0c867 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) + if (__this_cpu_read(pmcs_enabled)) return; - __get_cpu_var(pmcs_enabled) = 1; + __this_cpu_write(pmcs_enabled, 1); if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7505599c2593..9f8ea617ff2c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ static void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __get_cpu_var(irq_stat).timer_irqs_event++; + __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ static void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __get_cpu_var(irq_stat).timer_irqs_others++; + __this_cpu_inc(irq_stat.timer_irqs_others); } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ static void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0dc43f9932cf..e6595b72269b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __get_cpu_var(irq_stat).hmi_exceptions++; + __this_cpu_inc(irq_stat.hmi_exceptions); if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __get_cpu_var(irq_stat).pmu_irqs++; + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2e02ed849f36..16095841afe1 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = ++(__get_cpu_var(pcpu_last_used_sid)); + sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; + __this_cpu_write(pcpu_sids)entry[sid], entry); entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && + entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + __this_cpu_write(pcpu_last_used_sid, 0); + memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2fdc8722e324..cda695de8aa7 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -144,9 +144,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { + __this_cpu_read(last_vcpu_of_lpid[get_lpid(vcpu)]) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; + __this_cpu_write(last_vcpu_of_lpid[get_lpid(vcpu)], vcpu); } } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ae4962a06476..d53288a08c37 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -629,7 +629,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d5339a3b9945..f01027731e23 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1322,7 +1322,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..8aa04f03fd31 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a6995d4e93d4..7c4f6690533a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ static void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ static void power_pmu_start_txn(struct pmu *pmu) */ static void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index d35ae52c69dc..4acaea01fe03 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 8a106b4172e0..4c11421847be 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = &__get_cpu_var(cpu_iic); + struct iic *iic = this_cpu_ptr(&cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = &__get_cpu_var(cpu_iic); + iic = this_cpu_ptr(&cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); + out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index ae14c40b4b1c..e11273b2386d 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 5f3b23220b8e..a6c42f34303a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = &__get_cpu_var(ps3_private); + struct ps3_private *pd = this_cpu_ptr(&ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 1062f71f5a85..39049e4884fb 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 4575f0c9e521..f02ec3ab428c 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e32e00976a94..49bcf7c09c49 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -199,7 +199,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -212,7 +212,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -398,7 +398,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -406,7 +406,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 8c509d5397c6..87d8792f906c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -505,7 +505,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -695,7 +695,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; @@ -720,7 +720,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5a4d0fc03b03..c3b2a7e81ddb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fe0cca477164..365249cd346b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* * we have to reset the cppr index to 0 because we're From 9178ba294b6839eeff1a91bed95515d783f3ee6c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Oct 2014 16:01:09 +0200 Subject: [PATCH 002/101] powerpc: Convert power off logic to pm_power_off The generic Linux framework to power off the machine is a function pointer called pm_power_off. The trick about this pointer is that device drivers can potentially implement it rather than board files. Today on powerpc we set pm_power_off to invoke our generic full machine power off logic which then calls ppc_md.power_off to invoke machine specific power off. However, when we want to add a power off GPIO via the "gpio-poweroff" driver, this card house falls apart. That driver only registers itself if pm_power_off is NULL to ensure it doesn't override board specific logic. However, since we always set pm_power_off to the generic power off logic (which will just not power off the machine if no ppc_md.power_off call is implemented), we can't implement power off via the generic GPIO power off driver. To fix this up, let's get rid of the ppc_md.power_off logic and just always use pm_power_off as was intended. Then individual drivers such as the GPIO power off driver can implement power off logic via that function pointer. With this patch set applied and a few patches on top of QEMU that implement a power off GPIO on the virt e500 machine, I can successfully turn off my virtual machine after halt. Signed-off-by: Alexander Graf [mpe: Squash into one patch and update changelog based on cover letter] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 1 - arch/powerpc/kernel/setup-common.c | 6 +- arch/powerpc/platforms/44x/ppc476.c | 2 +- arch/powerpc/platforms/52xx/efika.c | 3 +- .../powerpc/platforms/83xx/mcu_mpc8349emitx.c | 8 +-- arch/powerpc/platforms/85xx/corenet_generic.c | 2 +- arch/powerpc/platforms/85xx/sgy_cts1000.c | 4 +- arch/powerpc/platforms/cell/celleb_setup.c | 4 +- arch/powerpc/platforms/cell/qpace_setup.c | 2 +- arch/powerpc/platforms/cell/setup.c | 2 +- arch/powerpc/platforms/chrp/setup.c | 3 +- arch/powerpc/platforms/embedded6xx/gamecube.c | 3 +- .../platforms/embedded6xx/linkstation.c | 4 +- arch/powerpc/platforms/embedded6xx/wii.c | 3 +- arch/powerpc/platforms/maple/setup.c | 4 +- arch/powerpc/platforms/powermac/setup.c | 3 +- arch/powerpc/platforms/powernv/setup.c | 4 +- arch/powerpc/platforms/ps3/setup.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 59 ++++++++++--------- arch/powerpc/sysdev/fsl_soc.c | 2 +- arch/powerpc/xmon/xmon.c | 3 +- 21 files changed, 66 insertions(+), 58 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 307347f8ddbd..f15f15c92544 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -142,7 +142,6 @@ struct machdep_calls { #endif void (*restart)(char *cmd); - void (*power_off)(void); void (*halt)(void); void (*panic)(char *str); void (*cpu_die)(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1362cd62b3fa..44c8d03558ac 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -139,8 +139,8 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - if (ppc_md.power_off) - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -151,7 +151,7 @@ void machine_power_off(void) /* Used by the G5 thermal driver */ EXPORT_SYMBOL_GPL(machine_power_off); -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); void machine_halt(void) diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 58db9d083969..c11ce6516c8f 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -94,7 +94,7 @@ static int avr_probe(struct i2c_client *client, { avr_i2c_client = client; ppc_md.restart = avr_reset_system; - ppc_md.power_off = avr_power_off_system; + pm_power_off = avr_power_off_system; return 0; } diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 3feffde9128d..6af651e69129 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -212,6 +212,8 @@ static int __init efika_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -225,7 +227,6 @@ define_machine(efika) .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .set_rtc_time = rtas_set_rtc_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 463fa91ee5b6..15e8021ddef9 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -167,10 +167,10 @@ static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) if (ret) goto err; - /* XXX: this is potentially racy, but there is no lock for ppc_md */ - if (!ppc_md.power_off) { + /* XXX: this is potentially racy, but there is no lock for pm_power_off */ + if (!pm_power_off) { glob_mcu = mcu; - ppc_md.power_off = mcu_power_off; + pm_power_off = mcu_power_off; dev_info(&client->dev, "will provide power-off service\n"); } @@ -197,7 +197,7 @@ static int mcu_remove(struct i2c_client *client) device_remove_file(&client->dev, &dev_attr_status); if (glob_mcu == mcu) { - ppc_md.power_off = NULL; + pm_power_off = NULL; glob_mcu = NULL; } diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index e56b89a792ed..1f309ccb096e 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -170,7 +170,7 @@ static int __init corenet_generic_probe(void) ppc_md.get_irq = ehv_pic_get_irq; ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; + pm_power_off = fsl_hv_halt; ppc_md.halt = fsl_hv_halt; #ifdef CONFIG_SMP /* diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 8162b0412117..e149c9ec26ae 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -120,7 +120,7 @@ static int gpio_halt_probe(struct platform_device *pdev) /* Register our halt function */ ppc_md.halt = gpio_halt_cb; - ppc_md.power_off = gpio_halt_cb; + pm_power_off = gpio_halt_cb; printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" " irq).\n", gpio, trigger, irq); @@ -137,7 +137,7 @@ static int gpio_halt_remove(struct platform_device *pdev) free_irq(irq, halt_node); ppc_md.halt = NULL; - ppc_md.power_off = NULL; + pm_power_off = NULL; gpio_free(gpio); diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 34e8ce2976aa..90be8ec51686 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -142,6 +142,7 @@ static int __init celleb_probe_beat(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS | FW_FEATURE_BEAT | FW_FEATURE_LPAR; hpte_init_beat_v3(); + pm_power_off = beat_power_off; return 1; } @@ -190,6 +191,7 @@ static int __init celleb_probe_native(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -204,7 +206,6 @@ define_machine(celleb_beat) { .setup_arch = celleb_setup_arch_beat, .show_cpuinfo = celleb_show_cpuinfo, .restart = beat_restart, - .power_off = beat_power_off, .halt = beat_halt, .get_rtc_time = beat_get_rtc_time, .set_rtc_time = beat_set_rtc_time, @@ -230,7 +231,6 @@ define_machine(celleb_native) { .setup_arch = celleb_setup_arch_native, .show_cpuinfo = celleb_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index 6e3409d590ac..d328140dc6f5 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -127,6 +127,7 @@ static int __init qpace_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -137,7 +138,6 @@ define_machine(qpace) { .setup_arch = qpace_setup_arch, .show_cpuinfo = qpace_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 6ae25fb62015..d62aa982d530 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -259,6 +259,7 @@ static int __init cell_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -269,7 +270,6 @@ define_machine(cell) { .setup_arch = cell_setup_arch, .show_cpuinfo = cell_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 5b77b1919fd2..860a59eb8ea2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -585,6 +585,8 @@ static int __init chrp_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -597,7 +599,6 @@ define_machine(chrp) { .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .time_init = chrp_time_init, .set_rtc_time = chrp_set_rtc_time, diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index bd4ba5d7d568..fe0ed6ee285e 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -67,6 +67,8 @@ static int __init gamecube_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,gamecube")) return 0; + pm_power_off = gamecube_power_off; + return 1; } @@ -80,7 +82,6 @@ define_machine(gamecube) { .probe = gamecube_probe, .init_early = gamecube_init_early, .restart = gamecube_restart, - .power_off = gamecube_power_off, .halt = gamecube_halt, .init_IRQ = flipper_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 168e1d80b2e5..540eeb58d3f0 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -147,6 +147,9 @@ static int __init linkstation_probe(void) if (!of_flat_dt_is_compatible(root, "linkstation")) return 0; + + pm_power_off = linkstation_power_off; + return 1; } @@ -158,7 +161,6 @@ define_machine(linkstation){ .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, .restart = linkstation_restart, - .power_off = linkstation_power_off, .halt = linkstation_halt, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 388e29bab8f6..352592d3e44e 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -211,6 +211,8 @@ static int __init wii_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,wii")) return 0; + pm_power_off = wii_power_off; + return 1; } @@ -226,7 +228,6 @@ define_machine(wii) { .init_early = wii_init_early, .setup_arch = wii_setup_arch, .restart = wii_restart, - .power_off = wii_power_off, .halt = wii_halt, .init_IRQ = wii_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index cb1b0b35a0c6..56b85cd61aaf 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -169,7 +169,7 @@ static void __init maple_use_rtas_reboot_and_halt_if_present(void) if (rtas_service_present("system-reboot") && rtas_service_present("power-off")) { ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } } @@ -312,6 +312,7 @@ static int __init maple_probe(void) alloc_dart_table(); hpte_init_native(); + pm_power_off = maple_power_off; return 1; } @@ -325,7 +326,6 @@ define_machine(maple) { .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, .restart = maple_restart, - .power_off = maple_power_off, .halt = maple_halt, .get_boot_time = maple_get_boot_time, .set_rtc_time = maple_set_rtc_time, diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index b127a29ac526..713d36d45d1d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -632,6 +632,8 @@ static int __init pmac_probe(void) smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); #endif /* CONFIG_PMAC_SMU */ + pm_power_off = pmac_power_off; + return 1; } @@ -663,7 +665,6 @@ define_machine(powermac) { .get_irq = NULL, /* changed later */ .pci_irq_fixup = pmac_pci_irq_fixup, .restart = pmac_restart, - .power_off = pmac_power_off, .halt = pmac_halt, .time_init = pmac_time_init, .get_boot_time = pmac_get_boot_time, diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3f9546d8a51f..941831d67cb2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -268,7 +268,7 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.get_rtc_time = opal_get_rtc_time; ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; - ppc_md.power_off = pnv_power_off; + pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; @@ -285,7 +285,7 @@ static void __init pnv_setup_machdep_rtas(void) ppc_md.set_rtc_time = rtas_set_rtc_time; } ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } #endif /* CONFIG_PPC_POWERNV_RTAS */ diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3f509f86432c..009a2004b876 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -248,6 +248,7 @@ static int __init ps3_probe(void) ps3_mm_init(); ps3_mm_vas_create(&htab_size); ps3_hpte_init(htab_size); + pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); return 1; @@ -278,7 +279,6 @@ define_machine(ps3) { .calibrate_decr = ps3_calibrate_decr, .progress = ps3_progress, .restart = ps3_restart, - .power_off = ps3_power_off, .halt = ps3_halt, #if defined(CONFIG_KEXEC) .kexec_cpu_down = ps3_kexec_cpu_down, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 125c589eeef5..db0fc0c07568 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -659,6 +659,34 @@ static void __init pSeries_init_early(void) pr_debug(" <- pSeries_init_early()\n"); } +/** + * pseries_power_off - tell firmware about how to power off the system. + * + * This function calls either the power-off rtas token in normal cases + * or the ibm,power-off-ups token (if present & requested) in case of + * a power failure. If power-off token is used, power on will only be + * possible with power button press. If ibm,power-off-ups token is used + * it will allow auto poweron after power is restored. + */ +static void pseries_power_off(void) +{ + int rc; + int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); + + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); + + if (rtas_poweron_auto == 0 || + rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { + rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); + printk(KERN_INFO "RTAS power-off returned %d\n", rc); + } else { + rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); + printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); + } + for (;;); +} + /* * Called very early, MMU is off, device-tree isn't unflattened */ @@ -741,6 +769,8 @@ static int __init pSeries_probe(void) else hpte_init_native(); + pm_power_off = pseries_power_off; + pr_debug("Machine is%s LPAR !\n", (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); @@ -754,34 +784,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -/** - * pSeries_power_off - tell firmware about how to power off the system. - * - * This function calls either the power-off rtas token in normal cases - * or the ibm,power-off-ups token (if present & requested) in case of - * a power failure. If power-off token is used, power on will only be - * possible with power button press. If ibm,power-off-ups token is used - * it will allow auto poweron after power is restored. - */ -static void pSeries_power_off(void) -{ - int rc; - int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); - - if (rtas_flash_term_hook) - rtas_flash_term_hook(SYS_POWER_OFF); - - if (rtas_poweron_auto == 0 || - rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { - rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); - printk(KERN_INFO "RTAS power-off returned %d\n", rc); - } else { - rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); - printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); - } - for (;;); -} - #ifndef CONFIG_PCI void pSeries_final_fixup(void) { } #endif @@ -796,7 +798,6 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .pci_probe_mode = pSeries_pci_probe_mode, .restart = rtas_restart, - .power_off = pSeries_power_off, .halt = rtas_halt, .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index ffd1169ebaab..1e04568da3b9 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -238,7 +238,7 @@ void fsl_hv_restart(char *cmd) /* * Halt the current partition * - * This function should be assigned to the ppc_md.power_off and ppc_md.halt + * This function should be assigned to the pm_power_off and ppc_md.halt * function pointers, to shut down the partition when we're running under * the Freescale hypervisor. */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b988b5addf86..506d25681fe3 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -981,7 +981,8 @@ static void bootcmds(void) else if (cmd == 'h') ppc_md.halt(); else if (cmd == 'p') - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); } static int cpu_cmd(void) From 7b051f665c32db3339ac9b6d9806ef508d09647f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Oct 2014 20:27:15 +1100 Subject: [PATCH 003/101] powerpc: Use probe_kernel_address in show_instructions We really don't want to take a pagefault in show_instructions, so use probe_kernel_address instead of __get_user. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e132b495bf..8c2691e445bd 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,9 +37,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -921,12 +921,8 @@ static void show_instructions(struct pt_regs *regs) pc = (unsigned long)phys_to_virt(pc); #endif - /* We use __get_user here *only* to avoid an OOPS on a - * bad address because the pc *should* only be a - * kernel address. - */ if (!__kernel_text_address(pc) || - __get_user(instr, (unsigned int __user *)pc)) { + probe_kernel_address((unsigned int __user *)pc, instr)) { printk(KERN_CONT "XXXXXXXX "); } else { if (regs->nip == pc) From adb7cd732292b06ee964d9f6090b17c70bd8bd3d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 11:40:26 +1100 Subject: [PATCH 004/101] powerpc/pci: Quieten unset I/O resource warning Newer POWER designs do not implement PCI I/O space, so we expect to see a number of these. Reduce the severity of the warning so it doesn't mask other real issues. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e5dad9a9edc0..bc2dab52a991 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1464,7 +1464,7 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - printk(KERN_WARNING "PCI: I/O resource not set for host" + pr_info("PCI: I/O resource not set for host" " bridge %s (domain %d)\n", hose->dn->full_name, hose->global_number); } else { From 64ff91ff85b56321e65b476e335955af9bed2c66 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 12:24:35 +1100 Subject: [PATCH 005/101] powerpc: Remove ppc64_boot_msg ppc64_boot_msg is meant to be a boot debug aid, but is only used in one spot. Get rid of it, and save ourseleves a couple of lines in the kernel log buffer. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 10 ---------- arch/powerpc/kernel/setup_64.c | 29 ----------------------------- 2 files changed, 39 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index f15f15c92544..4a6511f94258 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -342,16 +342,6 @@ extern sys_ctrler_t sys_ctrler; #endif /* CONFIG_PPC_PMAC */ - -/* Functions to produce codes on the leds. - * The SRC code should be unique for the message category and should - * be limited to the lower 24 bits (the upper 8 are set by these funcs), - * and (for boot & dump) should be sorted numerically in the order - * the events occur. - */ -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg); - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4f3cfe1b6a33..615aa904b216 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -665,8 +665,6 @@ static void __init emergency_stack_init(void) */ void __init setup_arch(char **cmdline_p) { - ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = boot_command_line; /* @@ -711,33 +709,6 @@ void __init setup_arch(char **cmdline_p) if ((unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); - - ppc64_boot_msg(0x15, "Setup Done"); -} - - -/* ToDo: do something useful if ppc_md is not yet setup. */ -#define PPC64_LINUX_FUNCTION 0x0f000000 -#define PPC64_IPL_MESSAGE 0xc0000000 -#define PPC64_TERM_MESSAGE 0xb0000000 - -static void ppc64_do_msg(unsigned int src, const char *msg) -{ - if (ppc_md.progress) { - char buf[128]; - - sprintf(buf, "%08X\n", src); - ppc_md.progress(buf, 0); - snprintf(buf, 128, "%s", msg); - ppc_md.progress(buf, 0); - } -} - -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); - printk("[boot]%04x %s\n", src, msg); } #ifdef CONFIG_SMP From 16d0f5c4af76b0c3424290937bf1ac22adf439b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 22:17:47 +1100 Subject: [PATCH 006/101] powerpc: Remove ppc_md.remove_memory We have an extra level of indirection on memory hot remove which is not matched on memory hot add. Memory hotplug is book3s only, so there is no need for it. This also enables means remove_memory() (ie memory hot unplug) works on powernv. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 4 ---- arch/powerpc/mm/mem.c | 14 +++++++++++-- .../platforms/pseries/hotplug-memory.c | 21 ------------------- 3 files changed, 12 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4a6511f94258..15c9150a58cc 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -291,10 +291,6 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_long)(unsigned long *v); #endif - -#ifdef CONFIG_MEMORY_HOTREMOVE - int (*remove_memory)(u64, u64); -#endif }; extern void e500_idle(void); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8ebaac75c940..2add0b7b9f6d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -144,8 +145,17 @@ int arch_remove_memory(u64 start, u64 size) zone = page_zone(pfn_to_page(start_pfn)); ret = __remove_pages(zone, start_pfn, nr_pages); - if (!ret && (ppc_md.remove_memory)) - ret = ppc_md.remove_memory(start, size); + if (ret) + return ret; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); return ret; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3c4c0dcd90d3..3cb256c2138e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -66,22 +65,6 @@ unsigned long pseries_memory_block_size(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -static int pseries_remove_memory(u64 start, u64 size) -{ - int ret; - - /* Remove htab bolted mappings for this section of memory */ - start = (unsigned long)__va(start); - ret = remove_section_mapping(start, start + size); - - /* Ensure all vmalloc mappings are flushed in case they also - * hit that section of memory - */ - vm_unmap_aliases(); - - return ret; -} - static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long block_sz, start_pfn; @@ -262,10 +245,6 @@ static int __init pseries_memory_hotplug_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_mem_nb); -#ifdef CONFIG_MEMORY_HOTREMOVE - ppc_md.remove_memory = pseries_remove_memory; -#endif - return 0; } machine_device_initcall(pseries, pseries_memory_hotplug_init); From 7439b37e7579543226cb30eeefdf989bed391997 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:06 +0200 Subject: [PATCH 007/101] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx Exception InstructionAccess does not exist on MPC8xx. No need to branch there from somewhere else. Handling can be done directly in InstructionTLBError Exception. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fafff8dbd5d9..d30f703790f4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -234,15 +234,10 @@ DataAccess: EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x400 InstructionAccess: - EXCEPTION_PROLOG - mr r4,r12 - mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -382,7 +377,7 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 - b InstructionAccess + b InstructionTLBError . = 0x1200 DataStoreTLBMiss: @@ -477,7 +472,11 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - b InstructionAccess + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ + EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to From 749137a2516aea627cbdd49140e60bb60d80f18e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:07 +0200 Subject: [PATCH 008/101] powerpc/8xx: DataAccess exception not generated by MPC8xx DataAccess exception is never generated by MPC8xx so do the job directly where it is used to avoid an unnecessary branching. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d30f703790f4..464be24fd3eb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -219,19 +219,10 @@ MachineCheck: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x300 DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 - mfspr r4,SPRN_DAR - li r10,0x00f0 - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. * This is "never generated" by the MPC8xx. @@ -491,7 +482,15 @@ DataTLBError: beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_EPILOG_0 - b DataAccess + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXC_XFER_LITE(0x300, handle_page_fault) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) From 6cde2b6f399e7d68a4b482680850a077104f9068 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: [PATCH 009/101] powerpc/8xx: No need to restore registers and save them again. In DTLBError handler there is not need to restore r10, r11 and cr registers after fixing DAR as they are saved again to the same place just after. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 464be24fd3eb..c061bc8d9045 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -481,8 +481,8 @@ DataTLBError: cmpwi cr0, r11, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - EXCEPTION_EPILOG_0 - EXCEPTION_PROLOG + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) mr r5,r10 From cbc130f120d5e180a70a0fa4a8b4191e44548a87 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: [PATCH 010/101] powerpc/8xx: Use M_TW instead of M_TWB Use M_TW instead of M_TWB for storing Level 1 table address as M_TWB requires 4k aligned tables, which is only the case with 4k pages. Consequently, we have to calculate the level 1 table index by ourselves. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 48 ++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c061bc8d9045..c78f7fd342cb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -278,8 +278,8 @@ SystemCall: . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction - * TLB. It is modelled after the example in the Motorola manual. The task - * switch loads the M_TWB register with the pointer to the first level table. + * TLB. The task switch loads the M_TW register with the pointer to the first + * level table. * If we discover there is no second level table (value is zero) or if there * is an invalid pte, we load that into the TLB, which causes another fault * into the TLB Error interrupt where we can handle such problems. @@ -301,7 +301,6 @@ InstructionTLBMiss: #endif DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -309,14 +308,17 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ +#endif + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ +#ifdef CONFIG_MODULES beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -377,18 +379,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andi. r11, r10, 0x0800 + andis. r11, r10, 0x8000 + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -527,12 +530,12 @@ FixupDAR:/* Entry point for dcbx workaround. */ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 - mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ -3: lwz r11, 0(r11) /* Get the level 1 entry */ +3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ @@ -541,6 +544,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ + mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ @@ -696,11 +700,11 @@ start_here: #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h ori r4, r4, cpu6_errata_word@l - li r3, 0x3980 + li r3, 0x3f80 stw r3, 12(r4) lwz r3, 12(r4) #endif - mtspr SPRN_M_TWB, r6 + mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l tophys(r4,r4) @@ -874,10 +878,10 @@ _GLOBAL(set_context) lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l tophys (r4, r4) - li r7, 0x3980 + li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) - mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + mtspr SPRN_M_TW, r4 /* Update MMU base address */ li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) @@ -885,7 +889,7 @@ _GLOBAL(set_context) #else mtspr SPRN_M_CASID,r3 /* Update context */ tophys (r4, r4) - mtspr SPRN_M_TWB, r4 /* and pgd */ + mtspr SPRN_M_TW, r4 /* and pgd */ #endif SYNC blr From 33fb845a6f019001b8ca3f532eb1a4de34547f42 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: [PATCH 011/101] powerpc/8xx: Don't use MD_TWC for walk MD_TWC can only be used properly with 4k pages. So lets calculate level 2 table index by ourselves. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c78f7fd342cb..3eea29ad1fcf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -299,8 +299,6 @@ InstructionTLBMiss: addi r11, r10, -0x1000 tlbie r11 #endif - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -328,10 +326,9 @@ InstructionTLBMiss: ori r11,r11,1 /* Set valid bit */ DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT @@ -397,12 +394,13 @@ DataStoreTLBMiss: /* We have a pte table, so load fetch the pte from the table. */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + mfspr r10, SPRN_MD_EPN /* Get address of fault */ + /* Extract level 2 index */ + rlwinm r10, r10, 22, 20, 29 + rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put @@ -528,18 +526,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ lwzx r11, r10, r11 /* Get the level 1 entry */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r11, 0(r11) /* Get the pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif From d14068035c3f6fd0d6514e061e4324a277be83e2 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 012/101] powerpc/8xx: Use PAGE size related consts For PAGE size related operations, use PAGE size consts in order to be able to use different page size in the futur. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3eea29ad1fcf..e126adf15528 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -294,9 +294,9 @@ InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 - addi r11, r10, 0x1000 + addi r11, r10, PAGE_SIZE tlbie r11 - addi r11, r10, -0x1000 + addi r11, r10, -PAGE_SIZE tlbie r11 #endif @@ -315,7 +315,8 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -327,7 +328,8 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -387,7 +389,8 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -396,8 +399,8 @@ DataStoreTLBMiss: */ mfspr r10, SPRN_MD_EPN /* Get address of fault */ /* Extract level 2 index */ - rlwinm r10, r10, 22, 20, 29 - rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ ori r11, r11, 1 /* Set valid bit in physical L2 page */ @@ -530,18 +533,20 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l -3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ +3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 - rlwimi r11, r10, 0, 20, 31 + rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, @@ -917,12 +922,13 @@ set_dec_cpu6: .globl sdata sdata: .globl empty_zero_page + .align PAGE_SHIFT empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PGD_TABLE_SIZE /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). From ac21951fa8a356e2aab6e93a61aa99b561100e67 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 013/101] powerpc/8xx: Const for TLB RPN forced value Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked to the page size and will vary when we change the page size. Lets define a const for it in order to have it at only one place. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e126adf15528..38efa8622176 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,6 +40,13 @@ #else #define DO_8xx_CPU6(val, reg) #endif + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN 0x00f0 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -211,7 +218,7 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -239,7 +246,7 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -343,7 +350,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, 0x00f0 + li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -447,7 +454,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 +2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -482,7 +489,7 @@ DataTLBError: EXCEPTION_PROLOG_0 mfspr r11, SPRN_DAR - cmpwi cr0, r11, 0x00f0 + cmpwi cr0, r11, RPN_PATTERN beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 @@ -491,7 +498,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR - li r10,0x00f0 + li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) From 959d6173b5cccceff47cc2d25feeaac2f96df0e0 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 014/101] powerpc/8xx: Implement 16k pages This patch activates the handling of 16k pages on the MPC8xx. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 2 ++ arch/powerpc/kernel/head_8xx.S | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..abb8709d8b2a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -549,7 +549,7 @@ config PPC_4K_PAGES bool "4k page size" config PPC_16K_PAGES - bool "16k page size" if 44x + bool "16k page size" if 44x || PPC_8xx config PPC_64K_PAGES bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3d11d3ce79ec..986b9e1e1044 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -56,6 +56,7 @@ * additional information from the MI_EPN, and MI_TWC registers. */ #define SPRN_MI_RPN 790 +#define MI_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* Define an RPN value for mapping kernel memory to large virtual * pages for boot initialization. This has real page number of 0, @@ -129,6 +130,7 @@ * additional information from the MD_EPN, and MD_TWC registers. */ #define SPRN_MD_RPN 798 +#define MD_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* This is a temporary storage register that could be used to save * a processor working register during a tablewalk. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 38efa8622176..84b0b9778065 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -45,7 +45,11 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ +#ifdef CONFIG_PPC_16K_PAGES +#define RPN_PATTERN (0x00f0 | MD_SPS16K) +#else #define RPN_PATTERN 0x00f0 +#endif __HEAD _ENTRY(_stext); From d3e40262e7d05236bf4c2c4fdf007589ba8af97a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 015/101] powerpc/8xx: Better readibility of ERRATA CPU6 handling This patch hiddes that SPR address needed for CPU6 ERRATA handling in the macro. Then we don't have to worry about this address directly in the code. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 84b0b9778065..6e9124177f19 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,12 +33,19 @@ /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 -#define DO_8xx_CPU6(val, reg) \ - li reg, val; \ - stw reg, 12(r0); \ - lwz reg, 12(r0); +#define SPRN_MI_TWC_ADDR 0x2b80 +#define SPRN_MI_RPN_ADDR 0x2d80 +#define SPRN_MD_TWC_ADDR 0x3b80 +#define SPRN_MD_RPN_ADDR 0x3d80 + +#define MTSPR_CPU6(spr, reg, treg) \ + li treg, spr##_ADDR; \ + stw treg, 12(r0); \ + lwz treg, 12(r0); \ + mtspr spr, reg #else -#define DO_8xx_CPU6(val, reg) +#define MTSPR_CPU6(spr, reg, treg) \ + mtspr spr, reg #endif /* @@ -336,8 +343,7 @@ InstructionTLBMiss: * for this "segment." */ ori r11,r11,1 /* Set valid bit */ - DO_8xx_CPU6(0x2b80, r3) - mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -356,8 +362,7 @@ InstructionTLBMiss: */ li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ - DO_8xx_CPU6(0x2d80, r3) - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 @@ -426,8 +431,7 @@ DataStoreTLBMiss: * It is bit 25 in the Linux PTE and bit 30 in the TWC */ rlwimi r11, r10, 32-5, 30, 30 - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: @@ -460,8 +464,7 @@ DataStoreTLBMiss: */ 2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 From 4094f28f90adab007eca9babf28f606a40a83032 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 016/101] powerpc/8xx: set PTE bit 22 off TLBmiss No need to re-set this bit at each TLB miss. Let's set it in the PTE. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 20 ++++++++++++++++++++ arch/powerpc/include/asm/pte-8xx.h | 7 +++++-- arch/powerpc/kernel/head_8xx.S | 10 ++-------- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 945e47adf7db..234e07c47803 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,6 +170,25 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; +#ifdef CONFIG_PPC_8xx + unsigned long tmp2; + + __asm__ __volatile__("\ +1: lwarx %0,0,%4\n\ + andc %1,%0,%5\n\ + or %1,%1,%6\n\ + /* 0x200 == Extended encoding, bit 22 */ \ + /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ + rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ + rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ + or %1,%3,%1\n\ + xori %1,%1,0x200\n" +" stwcx. %1,0,%4\n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); +#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ @@ -180,6 +199,7 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); +#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index d44826e4ff97..daa4616e61c4 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -48,19 +48,22 @@ */ #define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ #define _PAGE_USER 0x0800 /* msb PP bits */ +/* set when neither _PAGE_USER nor _PAGE_RW are set */ +#define _PAGE_KNLRO 0x0200 #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_ACCESSED +#define _PTE_NONE_MASK _PAGE_KNLRO /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) #define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6e9124177f19..8d6e6830a675 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -447,14 +447,8 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* Honour kernel RO, User NA */ - /* 0x200 == Extended encoding, bit 22 */ - rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ - /* r11 = (r10 & _PAGE_RW) >> 1 */ - rlwinm r11, r10, 32-1, 0x200 - or r10, r11, r10 - /* invert RW and 0x200 bits */ - xori r10, r10, _PAGE_RW | 0x200 + /* invert RW */ + xori r10, r10, _PAGE_RW /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. From c9a803fb17bcec0e7527dc8fa055e56a9691abbb Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: [PATCH 017/101] powerpc/8xx: _PMD_PRESENT already set in level 1 entries When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit during TLB loading is useless. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 8d6e6830a675..46b47e1fe2a9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -342,7 +342,6 @@ InstructionTLBMiss: /* We have a pte table, so load the MI_TWC with the attributes * for this "segment." */ - ori r11,r11,1 /* Set valid bit */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -419,7 +418,6 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put From b0168eb97b8b02594f47ce44faf1502f79e540df Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: [PATCH 018/101] powerpc/8xx: Don't restore regs to save them again. There is not need to restore r10, r11 and cr registers at this end of ITLBmiss handler as they are saved again to the same place in ITLBError handler we are jumping to. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46b47e1fe2a9..330d54418494 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -383,8 +383,7 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif mfspr r10, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 - b InstructionTLBError + b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -473,7 +472,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG + EXCEPTION_PROLOG_0 +InstructionTLBError1: + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ From 83c17ba35e0306e671b5c9ab622535f23a9a3e78 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: [PATCH 019/101] powerpc/8xx: Use DAR to save r3 for CPU6 ERRATA As we are not using anymore DAR to save registers, it is now available for saving the r3 register used for CPU6 ERRATA handling. Therefore we can remove the major hack which was to use memory location 0 to save r3. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 330d54418494..acf6d7eab6d5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -83,13 +83,6 @@ _ENTRY(_start); * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to * the "internal" processor registers before MMU_init is called. * - * The TLB code currently contains a major hack. Since I use the condition - * code register, I have to save and restore it. I am out of registers, so - * I just store it in memory location 0 (the TLB handlers are not reentrant). - * To avoid making any decisions, I need to use the "segment" valid bit - * in the first level table, but that would require many changes to the - * Linux page directory/table functions that I don't want to do right now. - * * -- Dan */ .globl __start @@ -306,7 +299,7 @@ SystemCall: */ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -351,7 +344,10 @@ InstructionTLBMiss: #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + li r11, RPN_PATTERN bne- cr0, 2f +#else + li r11, RPN_PATTERN #endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. @@ -359,28 +355,29 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi 2: - mfspr r11, SPRN_SRR1 + mfspr r10, SPRN_SRR1 /* clear all error bits as TLB Miss * sets a few unconditionally */ - rlwinm r11, r11, 0, 0xffff - mtspr SPRN_SRR1, r11 + rlwinm r10, r10, 0, 0xffff + mtspr SPRN_SRR1, r10 /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 b InstructionTLBError1 @@ -388,7 +385,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -459,7 +456,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 @@ -531,7 +528,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ @@ -550,7 +547,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ + mfspr r3, SPRN_DAR #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 From c51a6821bdbc9068adda93b3b8ee65df8e4642a6 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: [PATCH 020/101] powerpc/8xx: Invalidate non present TLB as early as possible 8xx sometimes need to load a invalid/non-present TLBs in it DTLB asm handler. These must be invalidated separaly as linux mm doesn't. Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in arch/powerpc/mm/fault.c. This patch does the invalidation earlier in order to free the TLB as soon as possible. This also has the advantage of removing some 8xx specific code from fault.c Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 15 ++++++++++----- arch/powerpc/mm/fault.c | 7 ------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index acf6d7eab6d5..d99aac0d69f1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -475,8 +475,11 @@ InstructionTLBError1: EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x400, handle_page_fault) +1: EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -492,11 +495,13 @@ DataTLBError: DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - li r10,RPN_PATTERN + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 +1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08d659a9fcdb..eb79907f34fa 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include #include #include -#include #include "icswx.h" @@ -380,12 +379,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. - * These must be invalidated separately as linux mm don't. - */ - if (error_code & 0x40000000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x80000000, which is * "undefined". Of those that can be set, this is the only * one which seems bad. From 6f2ce34dd7a2cd34b937a149e5513edd9d9ba724 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 24 Sep 2014 10:06:19 +0200 Subject: [PATCH 021/101] powerpc/8xx: Remove Kconfig symbol FADS Commit 39eb56da2b53 ("pcmcia: Remove m8xx_pcmcia driver") removed the only driver that used CONFIG_FADS. Setting the Kconfig symbol FADS is pointless since that commit. Remove it. Signed-off-by: Paul Bolle Signed-off-by: Scott Wood --- arch/powerpc/platforms/8xx/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index bd6f1a1cf922..157250426b56 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -1,6 +1,3 @@ -config FADS - bool - config CPM1 bool select CPM @@ -13,7 +10,6 @@ choice config MPC8XXFADS bool "FADS" - select FADS config MPC86XADS bool "MPC86XADS" From 45c22ed7446fd6acdaaac673ccf5b63862a3e988 Mon Sep 17 00:00:00 2001 From: Ashish Kumar Date: Tue, 7 Oct 2014 18:04:36 +0530 Subject: [PATCH 022/101] powerpc/mpc85xx: Remove SPI and NAND partition from bsc9131rdb.dtsi * Run "mtdparts default" on u-boot to create dynamic partitions * Or use dynamic mtd partition with the help of bootargs in u-boot Append bootargs with: "mtdparts=ff800000.flash:1m(nand_uboot),512K(nand_dtb),8m(nand_kernel),-(fs);\ spiff707000.0:1m(spi_uboot),4m(spi_kernel),512k(spi_dtb),-(fs)'" Signed-off-by: Ashish Kumar Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/bsc9131rdb.dtsi | 50 --------------------------- 1 file changed, 50 deletions(-) diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi b/arch/powerpc/boot/dts/bsc9131rdb.dtsi index 9e6c01339ccc..45efcbadb23c 100644 --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi @@ -40,31 +40,6 @@ compatible = "fsl,ifc-nand"; reg = <0x0 0x0 0x4000>; - partition@0 { - /* This location must not be altered */ - /* 3MB for u-boot Bootloader Image */ - reg = <0x0 0x00300000>; - label = "NAND U-Boot Image"; - read-only; - }; - - partition@300000 { - /* 1MB for DTB Image */ - reg = <0x00300000 0x00100000>; - label = "NAND DTB Image"; - }; - - partition@400000 { - /* 8MB for Linux Kernel Image */ - reg = <0x00400000 0x00800000>; - label = "NAND Linux Kernel Image"; - }; - - partition@c00000 { - /* Rest space for Root file System Image */ - reg = <0x00c00000 0x07400000>; - label = "NAND RFS Image"; - }; }; }; @@ -82,31 +57,6 @@ reg = <0>; spi-max-frequency = <50000000>; - /* 512KB for u-boot Bootloader Image */ - partition@0 { - reg = <0x0 0x00080000>; - label = "SPI Flash U-Boot Image"; - read-only; - }; - - /* 512KB for DTB Image */ - partition@80000 { - reg = <0x00080000 0x00080000>; - label = "SPI Flash DTB Image"; - }; - - /* 4MB for Linux Kernel Image */ - partition@100000 { - reg = <0x00100000 0x00400000>; - label = "SPI Flash Kernel Image"; - }; - - /*11MB for RFS Image */ - partition@500000 { - reg = <0x00500000 0x00B00000>; - label = "SPI Flash RFS Image"; - }; - }; }; From 19bc4808f9ecdef9ca3f3b3807a5228b00d74f83 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Thu, 30 Oct 2014 11:15:47 +0200 Subject: [PATCH 023/101] powerpc/fsl: Added rcw registers to global utility registers The RCW registers are required for the future clock binding implementation. Signed-off-by: Igal Liberman Change-Id: Ic36dd8bc2959aa7f97fb6fd7bbb8420822fef0a9 Signed-off-by: Scott Wood --- arch/powerpc/include/asm/fsl_guts.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h index 77ced0b3d81d..43b6bb1a4a9c 100644 --- a/arch/powerpc/include/asm/fsl_guts.h +++ b/arch/powerpc/include/asm/fsl_guts.h @@ -68,7 +68,10 @@ struct ccsr_guts { u8 res0b4[0xc0 - 0xb4]; __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register Called 'elbcvselcr' on 86xx SOCs */ - u8 res0c4[0x224 - 0xc4]; + u8 res0c4[0x100 - 0xc4]; + __be32 rcwsr[16]; /* 0x.0100 - Reset Control Word Status registers + There are 16 registers */ + u8 res140[0x224 - 0x140]; __be32 iodelay1; /* 0x.0224 - IO delay control register 1 */ __be32 iodelay2; /* 0x.0228 - IO delay control register 2 */ u8 res22c[0x604 - 0x22c]; From 3b6b17900bfa3c1929741e720495beaa559b2aff Mon Sep 17 00:00:00 2001 From: Hongtao Jia Date: Wed, 5 Nov 2014 14:59:52 +0800 Subject: [PATCH 024/101] powerpc: Add ADT7461 to device tree for supported boards Including: T104xRDB T208xQDS B4QDS Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/b4qds.dtsi | 11 +++++++++++ arch/powerpc/boot/dts/t104xrdb.dtsi | 7 +++++++ arch/powerpc/boot/dts/t208xqds.dtsi | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index 8b47edcfabf0..bccc9867bc6b 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -152,6 +152,17 @@ reg = <0x68>; }; }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi index 1cf0f3c5f7e5..187add885cae 100644 --- a/arch/powerpc/boot/dts/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -83,6 +83,13 @@ }; }; + i2c@118000 { + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; + i2c@118100 { pca9546@77 { compatible = "nxp,pca9546"; diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi index 555dc6e03d89..59061834d54e 100644 --- a/arch/powerpc/boot/dts/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -169,6 +169,17 @@ shunt-resistor = <1000>; }; }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; From 94701fcb2f085cef29b29051f21de7fc3718217a Mon Sep 17 00:00:00 2001 From: Hongtao Jia Date: Wed, 5 Nov 2014 14:59:53 +0800 Subject: [PATCH 025/101] powerpc: Add INA220 to device tree for supported boards Including: P3041DS P5020DS P5040DS B4QDS Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/b4qds.dtsi | 12 ++++++++++++ arch/powerpc/boot/dts/p3041ds.dts | 20 ++++++++++++++++++++ arch/powerpc/boot/dts/p5020ds.dts | 20 ++++++++++++++++++++ arch/powerpc/boot/dts/p5040ds.dts | 20 ++++++++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index bccc9867bc6b..e5bde0b85135 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -153,6 +153,18 @@ }; }; + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + }; + i2c@3 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 2fed3bc0b990..394ea9c943c9 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 2869fea717dd..b7f3057cd894 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 860b5ccf76c0..7e04bf487c04 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -95,6 +95,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; From eaffcb0f1bebbcfd38ecc9bdca105f7123115ab1 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:11 -0600 Subject: [PATCH 026/101] powerpc/dts: Factorize the clock control node Signed-off-by: Emil Medve Change-Id: I25ce24a25862b4ca460164159867abefe00ccdd1 Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/b4860emu.dts | 4 +- arch/powerpc/boot/dts/fsl/b4420si-post.dtsi | 28 +------ arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 28 +------ arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 48 +----------- arch/powerpc/boot/dts/fsl/p3041si-post.dtsi | 48 +----------- arch/powerpc/boot/dts/fsl/p4080si-post.dtsi | 48 +----------- arch/powerpc/boot/dts/fsl/p5020si-post.dtsi | 48 +----------- arch/powerpc/boot/dts/fsl/p5040si-post.dtsi | 48 +----------- .../powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi | 78 +++++++++++++++++++ .../powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi | 61 +++++++++++++++ arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 30 +------ arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 29 +------ arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 29 +------ arch/powerpc/boot/dts/t4240emu.dts | 4 +- 14 files changed, 163 insertions(+), 368 deletions(-) create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts index 85646b4f96e1..2aa5cd318ce8 100644 --- a/arch/powerpc/boot/dts/b4860emu.dts +++ b/arch/powerpc/boot/dts/b4860emu.dts @@ -193,9 +193,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi index d67894459ac8..86161ae6c966 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi @@ -80,33 +80,9 @@ compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index 582381dba1d7..65100b9636b7 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -124,33 +124,9 @@ compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 69ce1026c948..efd74db4f9b0 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -305,53 +305,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p2041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index cd63cb1b1042..d7425ef1ae41 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -332,53 +332,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p3041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 12947ccddf25..7005a4a4cef0 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -352,35 +352,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p4080-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; pll2: pll2@840 { #clock-cells = <1>; @@ -398,24 +372,6 @@ clock-output-names = "pll3", "pll3-div2"; }; - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; - mux2: mux2@40 { #clock-cells = <0>; reg = <0x40 0x4>; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 4c4a2b0436b2..55834211bd28 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -337,53 +337,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 67296fdd9698..6e4cd6ce363c 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -297,53 +297,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi new file mode 100644 index 000000000000..48710482806e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi @@ -0,0 +1,78 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-1.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + clock-frequency = <0>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-1.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2"; + }; + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux0"; + }; + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux1"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi new file mode 100644 index 000000000000..5d18d2a6cf52 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi @@ -0,0 +1,61 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-2.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-2.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2", "pll0-div4"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2", "pll1-div4"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 12e597eea3c8..15ae462e758f 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -281,35 +281,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index aecee9690a88..1ce91e3485a9 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -305,34 +305,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 7e2fc7cdce48..0e96fcabe812 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -368,34 +368,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; pll2: pll2@840 { #clock-cells = <1>; diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts index bc12127a03fb..decaf357db9c 100644 --- a/arch/powerpc/boot/dts/t4240emu.dts +++ b/arch/powerpc/boot/dts/t4240emu.dts @@ -250,9 +250,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" From f1aa77c9703148fad7d819d9d764dae0cb82d141 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:12 -0600 Subject: [PATCH 027/101] dt/bindings: qoriq-clock: Add binding for the platform PLL Signed-off-by: Emil Medve Change-Id: I7950afa9650d15ec7ce2cca89bb2a1e38586d4a5 Signed-off-by: Scott Wood --- .../devicetree/bindings/clock/qoriq-clock.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt index 5666812fc42b..266ff9d23229 100644 --- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt +++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt @@ -62,6 +62,8 @@ Required properties: It takes parent's clock-frequency as its clock. * "fsl,qoriq-sysclk-2.0": for input system clock (v2.0). It takes parent's clock-frequency as its clock. + * "fsl,qoriq-platform-pll-1.0" for the platform PLL clock (v1.0) + * "fsl,qoriq-platform-pll-2.0" for the platform PLL clock (v2.0) - #clock-cells: From common clock binding. The number of cells in a clock-specifier. Should be <0> for "fsl,qoriq-sysclk-[1,2].0" clocks, or <1> for "fsl,qoriq-core-pll-[1,2].0" clocks. @@ -128,8 +130,16 @@ Example for clock block and clock provider: clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; clock-output-names = "cmux1"; }; + + platform-pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; - } +}; Example for clock consumer: @@ -139,4 +149,4 @@ Example for clock consumer: clocks = <&mux0>; ... }; - } +}; From 58810cb7f66e47fce2e8945deeab5a4226e3975c Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:13 -0600 Subject: [PATCH 028/101] powerpc/dts: Add node(s) for the platform PLL Signed-off-by: Emil Medve Change-Id: If76cd705a01813abe53396c1486bc13c4289ee92 Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi | 7 +++++++ arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi index 48710482806e..4ece1edbff63 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi @@ -75,4 +75,11 @@ global-utilities@e1000 { clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; clock-output-names = "cmux1"; }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi index 5d18d2a6cf52..48e0b6e4ce33 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi @@ -58,4 +58,11 @@ global-utilities@e1000 { clocks = <&sysclk>; clock-output-names = "pll1", "pll1-div2", "pll1-div4"; }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; From 10239733ee8617bac3f1c1769af43a88ed979324 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:33 +1000 Subject: [PATCH 029/101] powerpc: Remove bootmem allocator At the moment we transition from the memblock alloctor to the bootmem allocator. Gitting rid of the bootmem allocator removes a bunch of complicated code (most of which I owe the dubious honour of being responsible for writing). Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/setup.h | 3 +- arch/powerpc/kernel/setup_32.c | 5 +- arch/powerpc/kernel/setup_64.c | 3 +- arch/powerpc/mm/init_32.c | 9 - arch/powerpc/mm/mem.c | 62 +----- arch/powerpc/mm/numa.c | 216 +++---------------- arch/powerpc/mm/pgtable_32.c | 3 +- arch/powerpc/mm/pgtable_64.c | 6 +- arch/powerpc/platforms/512x/mpc512x_shared.c | 9 +- 10 files changed, 43 insertions(+), 274 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..98e9c548bd75 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select NO_BOOTMEM config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 11ba86e17631..fbdf18cf954c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern int init_bootmem_done; /* set once bootmem is available */ extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -24,7 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); -void do_init_bootmem(void); +void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 07831ed0d9ef..88a36e64bf9b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -311,9 +311,8 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + initmem_init(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 615aa904b216..91d9cfaffe6c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -689,8 +689,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); + initmem_init(); sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 415a51b028b9..3b3100433c18 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -195,15 +195,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2add0b7b9f6d..e076d19c095b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -61,7 +61,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -190,70 +189,22 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; #endif - /* - * Find an area to use for the bootmem bitmap. Calculate the size of - * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. - * Add 1 additional page in case the address isn't page-aligned. - */ - bootmap_pages = bootmem_bootmap_pages(total_pages); - - start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - - min_low_pfn = MEMORY_START >> PAGE_SHIFT; - boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); - /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - /* Add all physical memory to the bootmem map, mark each area - * present. - */ -#ifdef CONFIG_HIGHMEM - free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - if (top < lowmem_end_addr) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - else if (reg->base < lowmem_end_addr) { - unsigned long trunc_size = lowmem_end_addr - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } -#else - free_bootmem_with_active_regions(0, max_pfn); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); - - init_bootmem_done = 1; } /* mark pages that don't exist as nosave */ @@ -369,14 +320,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -static void __init register_page_bootmem_info(void) -{ - int i; - - for_each_online_node(i) - register_page_bootmem_info_node(NODE_DATA(i)); -} - void __init mem_init(void) { /* @@ -389,7 +332,6 @@ void __init mem_init(void) swiotlb_init(0); #endif - register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9d1dfdbe5bb..2e9ad2d76b75 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -/* - * get_node_active_region - Return active region containing pfn - * Active range returned is empty if none found. - * @pfn: The page to return the region for - * @node_ar: Returned set to the active region containing @pfn - */ -static void __init get_node_active_region(unsigned long pfn, - struct node_active_region *node_ar) -{ - unsigned long start_pfn, end_pfn; - int i, nid; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (pfn >= start_pfn && pfn < end_pfn) { - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = end_pfn; - break; - } - } -} - static void reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) } } -/* - * Allocate some memory, satisfying the memblock or bootmem allocator where - * required. nid is the preferred node and end is the physical address of - * the highest address in the node. - * - * Returns the virtual address of the memory. - */ -static void __init *careful_zallocation(int nid, unsigned long size, - unsigned long align, - unsigned long end_pfn) -{ - void *ret; - int new_nid; - unsigned long ret_paddr; - - ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); - - /* retry over all memory */ - if (!ret_paddr) - ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); - - if (!ret_paddr) - panic("numa.c: cannot allocate %lu bytes for node %d", - size, nid); - - ret = __va(ret_paddr); - - /* - * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the MEMBLOCK allocator to the - * bootmem allocator. If this function is called for - * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the MEMBLOCK allocator. - * - * So, check the nid from which this allocation came - * and double check to see if we need to use bootmem - * instead of the MEMBLOCK. We don't free the MEMBLOCK memory - * since it would be useless. - */ - new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); - if (new_nid < nid) { - ret = __alloc_bootmem_node(NODE_DATA(new_nid), - size, align, 0); - - dbg("alloc_bootmem %p %lx\n", ret, size); - } - - memset(ret, 0, size); - return ret; -} - static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; -static void __init mark_reserved_regions_for_nid(int nid) +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { - struct pglist_data *node = NODE_DATA(nid); - struct memblock_region *reg; + u64 spanned_pages = end_pfn - start_pfn; + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; - for_each_memblock(reserved, reg) { - unsigned long physbase = reg->base; - unsigned long size = reg->size; - unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = PFN_UP(physbase + size); - struct node_active_region node_ar; - unsigned long node_end_pfn = pgdat_end_pfn(node); + if (spanned_pages) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d\n", nid); - /* - * Check to make sure that this memblock.reserved area is - * within the bounds of the node that we care about. - * Checking the nid of the start and end points is not - * sufficient because the reserved area could span the - * entire node. - */ - if (end_pfn <= node->node_start_pfn || - start_pfn >= node_end_pfn) - continue; + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); - get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn && - node_ar.start_pfn < node_ar.end_pfn) { - unsigned long reserve_size = size; - /* - * if reserved region extends past active region - * then trim size to active region - */ - if (end_pfn > node_ar.end_pfn) - reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - physbase; - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } - /* - * if reserved region is contained in the active region - * then done. - */ - if (end_pfn <= node_ar.end_pfn) - break; + /* report and initialize */ + pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); - /* - * reserved region extends past the active region - * get next active region that contains this - * reserved region - */ - start_pfn = node_ar.end_pfn; - physbase = start_pfn << PAGE_SHIFT; - size = size - reserve_size; - get_node_active_region(start_pfn, &node_ar); - } - } + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = spanned_pages; } - -void __init do_init_bootmem(void) +void __init initmem_init(void) { int nid, cpu; - min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; @@ -1064,64 +956,16 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - void *bootmem_vaddr; - unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - - /* - * Allocate the node structure node local if possible - * - * Be careful moving this around, as it relies on all - * previous nodes' bootmem to be initialized and have - * all reserved areas marked. - */ - NODE_DATA(nid) = careful_zallocation(nid, - sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn); - - dbg("node %d\n", nid); - dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - NODE_DATA(nid)->node_start_pfn = start_pfn; - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - - if (NODE_DATA(nid)->node_spanned_pages == 0) - continue; - - dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); - dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_zallocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_pfn); - - dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - - init_bootmem_node(NODE_DATA(nid), - __pa(bootmem_vaddr) >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - /* - * Be very careful about moving this around. Future - * calls to careful_zallocation() depend on this getting - * done correctly. - */ - mark_reserved_regions_for_nid(nid); + setup_node_data(nid, start_pfn, end_pfn); sparse_memory_present_with_active_regions(nid); } - init_bootmem_done = 1; - - /* - * Now bootmem is initialised we can create the node to cpumask - * lookup tables and setup the cpu callback to populate them. - */ setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342bf519..d545b1231594 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add { pte_t *pte; extern int mem_init_done; - extern void *early_get_page(void); if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } else { - pte = (pte_t *)early_get_page(); + pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) clear_page(pte); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709ab489d..cdb19ab859d3 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -75,11 +75,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; - if (init_bootmem_done) - pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); - else - pt = __va(memblock_alloc_base(size, size, - __pa(MAX_DMA_ADDRESS))); + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); return pt; diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index e996e007bc44..711f3d352af7 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -297,14 +297,13 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to reserve_bootmem(). It will be + * also it's length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ if (diu_shared_fb.in_use) { - ret = reserve_bootmem(diu_shared_fb.fb_phys, - diu_shared_fb.fb_len, - BOOTMEM_EXCLUSIVE); + ret = memblock_reserve(diu_shared_fb.fb_phys, + diu_shared_fb.fb_len); if (ret) { pr_err("%s: reserve bootmem failed\n", __func__); diu_shared_fb.in_use = false; From 14ed740957704e8768523899e0fa31972577bf65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:34 +1000 Subject: [PATCH 030/101] powerpc: Remove some old bootmem related comments Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 5 +---- arch/powerpc/kernel/rtas.c | 4 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 4 ---- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 099f27e6d1b0..6af05fc1dec9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -696,10 +696,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* - * Ensure that total memory size is page-aligned, because otherwise - * mark_bootmem() gets upset. - */ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857c1421..4af905e81ab0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4fdc27c80f4c..e64868c0b8de 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f03fd31..b460e723f0ec 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab859d3..aa9173745cf4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized - * which our ppc64 code is keen to do that, we'll need to - * fix it and/or be more careful - */ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { From 68cf0d642f62267b960f947370539ff3582c4935 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:35 +1000 Subject: [PATCH 031/101] powerpc: Remove superfluous bootmem includes Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash_dump.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/kernel/rtas_pci.c | 1 - arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_epci.c | 1 - arch/powerpc/platforms/cell/celleb_scc_pciex.c | 1 - arch/powerpc/platforms/maple/pci.c | 1 - arch/powerpc/platforms/powermac/pci.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/powernv/pci.c | 1 - arch/powerpc/sysdev/fsl_msi.c | 1 - arch/powerpc/sysdev/ipic.c | 1 - arch/powerpc/sysdev/mpic.c | 1 - arch/powerpc/sysdev/mpic_pasemi_msi.c | 1 - arch/powerpc/sysdev/mpic_u3msi.c | 1 - arch/powerpc/sysdev/ppc4xx_msi.c | 1 - arch/powerpc/sysdev/ppc4xx_pci.c | 1 - arch/powerpc/sysdev/qe_lib/qe.c | 1 - arch/powerpc/sysdev/qe_lib/qe_ic.c | 1 - arch/powerpc/sysdev/uic.c | 1 - 26 files changed, 1 insertion(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index c78e6dac4d7d..cfa0f81a5bb0 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8d87bb162ecd..45096033d37b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013da27e0..ba0f2d6fc27f 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7c55b86206b3..ce230da2c015 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 88a36e64bf9b..ba3a3c3b8c66 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351842cf..305eb0d9b768 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e64868c0b8de..3f1bb5a36c27 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3b3100433c18..a10be665b645 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556a1880..10471f9bb63f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa9173745cf4..e0c718543174 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -51,6 +50,7 @@ #include #include #include +#include #include "mmu_decl.h" diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index 844c0facb4f7..9438bbed402f 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 4278acfa2ede..f22387598040 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index f7136aae8bbf..d3a13067ec42 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 7e868ccf3b0d..04702db35d45 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index eba9cb10619c..db3803e21483 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b2187d0068b8..ba7452708db3 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48b460e..8f37204bf94f 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -13,7 +13,6 @@ * */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b50f97811c25..b28733727ed3 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 89cec0ed6a58..c4648ad5c1f3 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 15dccd35fa11..428de9d23120 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -16,7 +16,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 623d7fba15b4..4ce8f54f234c 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 22b5200636e7..a59f2890897a 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -22,7 +22,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index df6e2fc4ff92..086aca69ecae 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 238a07b97f2c..b584debbcd9c 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index b2b87c30e266..543765e1ef14 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 92033936a8f7..7c37157d4c24 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include From 21098b9e07476deb3f40acd7e51cffbffb4ef865 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:36 +1000 Subject: [PATCH 032/101] powerpc: Move sparse_init() into initmem_init We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/numa.c | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 91d9cfaffe6c..6e5310ddf8c7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -690,7 +690,6 @@ void __init setup_arch(char **cmdline_p) emergency_stack_init(); initmem_init(); - sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e076d19c095b..b7285a5870f8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -205,6 +205,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2e9ad2d76b75..417b0a523a47 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -966,6 +966,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); From 6e4c632cdff7bf0238a2543dfe98bd1ad40313c2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:37 +1000 Subject: [PATCH 033/101] powerpc: make __ffs return unsigned long I'm seeing a build warning in mm/nobootmem.c after removing bootmem: mm/nobootmem.c: In function '__free_pages_memory': include/linux/kernel.h:713:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (&_min1 == &_min2); \ ^ mm/nobootmem.c:90:11: note: in expansion of macro 'min' order = min(MAX_ORDER - 1UL, __ffs(start)); ^ The rest of the worlds seems to define __ffs as returning unsigned long, so lets do that. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bitops.h | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index bd3bd573d0ae..c633f058ba43 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -213,7 +213,7 @@ static __inline__ unsigned long ffz(unsigned long x) return __ilog2(x & -x); } -static __inline__ int __ffs(unsigned long x) +static __inline__ unsigned long __ffs(unsigned long x) { return __ilog2(x & -x); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 65d2ed4549e6..d8484d7cffaa 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -152,7 +152,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, flags |= 0x10000000; /* enable relaxed ordering */ for (i = 0; size > 0; i++) { - unsigned int bits = min(ilog2(size), + unsigned int bits = min_t(u32, ilog2(size), __ffs(pci_addr | phys_addr)); if (index + i >= 5) From 7d56c65a6ff9065c459fc63c509950d8ea66e00c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 17:07:03 +1000 Subject: [PATCH 034/101] powerpc/ftrace: Remove mod_return_to_handler mod_return_to_handler is the same as return_to_handler, except it handles the change of the TOC (r2). Add this into return_to_handler and remove mod_return_to_handler. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 24 +----------------------- arch/powerpc/kernel/ftrace.c | 14 ++------------ arch/powerpc/kernel/process.c | 9 +-------- 3 files changed, 4 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0905c8da90f1..66f7ed32848f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1240,28 +1240,6 @@ _GLOBAL(ftrace_graph_caller) blr _GLOBAL(return_to_handler) - /* need to save return values */ - std r4, -24(r1) - std r3, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -24(r1) - ld r3, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr - -_GLOBAL(mod_return_to_handler) /* need to save return values */ std r4, -32(r1) std r3, -24(r1) @@ -1272,7 +1250,7 @@ _GLOBAL(mod_return_to_handler) stdu r1, -112(r1) /* - * We are in a module using the module's TOC. + * We might be called from a module. * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c0f03d..abf79213c356 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -510,10 +510,6 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ -#ifdef CONFIG_PPC64 -extern void mod_return_to_handler(void); -#endif - /* * Hook the return address and push it in the stack of return addrs * in current thread info. @@ -523,7 +519,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long old; int faulted; struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) return; @@ -531,13 +527,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; -#ifdef CONFIG_PPC64 - /* non core kernel code needs to save and restore the TOC */ - if (REGION_ID(self_addr) != KERNEL_REGION_ID) - return_hooker = (unsigned long)&mod_return_to_handler; -#endif - - return_hooker = ppc_function_entry((void *)return_hooker); + return_hooker = ppc_function_entry(return_to_handler); /* * Protect against fault, even if it shouldn't diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8c2691e445bd..f6b82152e7aa 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1527,13 +1527,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) int curr_frame = current->curr_ret_stack; extern void return_to_handler(void); unsigned long rth = (unsigned long)return_to_handler; - unsigned long mrth = -1; -#ifdef CONFIG_PPC64 - extern void mod_return_to_handler(void); - rth = *(unsigned long *)rth; - mrth = (unsigned long)mod_return_to_handler; - mrth = *(unsigned long *)mrth; -#endif #endif sp = (unsigned long) stack; @@ -1558,7 +1551,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if ((ip == rth || ip == mrth) && curr_frame >= 0) { + if ((ip == rth) && curr_frame >= 0) { printk(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; From b3c18725a0eb7ea2458e9ae3b7e5a477f52e361f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 17:07:04 +1000 Subject: [PATCH 035/101] powerpc/ftrace: simplify prepare_ftrace_return Instead of passing in the stack address of the link register to be modified, just pass in the old value and return the new value and rely on ftrace_graph_caller to do the modification. This removes the exception handling around the stack update - it isn't needed and we weren't consistent about it. Later on we would do an unprotected modification: if (!ftrace_graph_entry(&trace)) { *parent = old; Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_32.S | 10 ++++-- arch/powerpc/kernel/entry_64.S | 11 +++++-- arch/powerpc/kernel/ftrace.c | 59 ++++++++-------------------------- 3 files changed, 30 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 22b45a4955cd..ad837d8a26d4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1424,12 +1424,18 @@ _GLOBAL(ftrace_graph_caller) lwz r4, 44(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ - addi r3, r1, 52 + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66f7ed32848f..194e46dcf08d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1227,13 +1227,20 @@ _GLOBAL(ftrace_graph_caller) ld r4, 128(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ + /* Grab the LR out of the caller stack frame */ ld r11, 112(r1) - addi r3, r11, 16 + ld r3, 16(r11) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + ld r0, 128(r1) mtlr r0 addi r1, r1, 112 diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index abf79213c356..d7950317d3a1 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -512,67 +512,34 @@ int ftrace_disable_ftrace_graph_caller(void) /* * Hook the return address and push it in the stack of return addrs - * in current thread info. + * in current thread info. Return the address we want to divert to. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - unsigned long old; - int faulted; struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) - return; + goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; + goto out; return_hooker = ppc_function_entry(return_to_handler); - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " PPC_LL "%[old], 0(%[parent])\n" - "2: " PPC_STL "%[return_hooker], 0(%[parent])\n" - " li %[faulted], 0\n" - "3:\n" - - ".section .fixup, \"ax\"\n" - "4: li %[faulted], 1\n" - " b 3b\n" - ".previous\n" - - ".section __ex_table,\"a\"\n" - PPC_LONG_ALIGN "\n" - PPC_LONG "1b,4b\n" - PPC_LONG "2b,4b\n" - ".previous" - - : [old] "=&r" (old), [faulted] "=r" (faulted) - : [parent] "r" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } - - trace.func = self_addr; + trace.func = ip; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; - return; - } + if (!ftrace_graph_entry(&trace)) + goto out; - if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) - *parent = old; + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; + + parent = return_hooker; +out: + return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From dedd24a12fe6735898feeb06184ee346907abb5d Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sat, 25 May 2013 12:54:25 -0400 Subject: [PATCH 036/101] powerpc: Remove unused devm_ioremap_prot() Added in 2008, but has never had any in-tree users, and no other architectures provide it. Signed-off-by: Kyle McMartin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/io.h | 3 --- arch/powerpc/lib/Makefile | 1 - arch/powerpc/lib/devres.c | 43 ----------------------------------- 3 files changed, 47 deletions(-) delete mode 100644 arch/powerpc/lib/devres.c diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 97d3869991ca..5f0ad8904f65 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -851,9 +851,6 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags); - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9f342f134ae4..597562f69b2d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -12,7 +12,6 @@ CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ usercopy_64.o mem_64.o string.o \ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c deleted file mode 100644 index 8df55fc3aad6..000000000000 --- a/arch/powerpc/lib/devres.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include /* devres_*(), devm_ioremap_release() */ -#include -#include /* ioremap_prot() */ -#include /* EXPORT_SYMBOL() */ - -/** - * devm_ioremap_prot - Managed ioremap_prot() - * @dev: Generic device to remap IO address for - * @offset: BUS offset to map - * @size: Size of map - * @flags: Page flags - * - * Managed ioremap_prot(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags) -{ - void __iomem **ptr, *addr; - - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; - - addr = ioremap_prot(offset, size, flags); - if (addr) { - *ptr = addr; - devres_add(dev, ptr); - } else - devres_free(ptr); - - return addr; -} -EXPORT_SYMBOL(devm_ioremap_prot); From 60878dfb11d47a9029581543ee2d2101faa88072 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 29 Apr 2014 09:24:06 +0200 Subject: [PATCH 037/101] powerpc/ftrace: Fix obsolete comment CONFIG_MCOUNT is not defined anymore, the corresponding #ifdef there is CONFIG_FUNCTION_TRACER. Signed-off-by: Jiri Slaby Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ad837d8a26d4..10a093579191 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1463,4 +1463,4 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_MCOUNT */ +#endif /* CONFIG_FUNCTION_TRACER */ From 66f3d4fe0ba8f38df6141cc0b6ec145be6e63f0e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 23 Oct 2014 16:35:14 +1100 Subject: [PATCH 038/101] powerpc: Remove CPU_FTR_HVMODE from CPU_FTRS_ALWAYS We potentially clear CPU_FTR_HVMODE at runtime in __init_hvmode_206(), so we must make sure it's not set in CPU_FTRS_ALWAYS. This doesn't hurt us in practice at the moment, because we don't support compiling only for CPUs that support CPU_FTR_HVMODE. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index daa5af91163c..42d8b33d1623 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -511,7 +511,7 @@ enum { (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE) + CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE) #endif #else enum { From 90029640fd5963343fb862d419db161bc0424120 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Aug 2014 18:26:28 +1000 Subject: [PATCH 039/101] powerpc: Remove unused CPU_FTRS_A2 In commit fb5a515704d7 "Remove platforms/wsp and associated pieces" we removed the last user of CPU_FTRS_A2, so we should remove it too. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 42d8b33d1623..22d5a7da9e68 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -448,13 +448,9 @@ extern const char *powerpc_base_platform; CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) -#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ - CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_ICSWX | CPU_FTR_DABRX ) - #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ @@ -505,7 +501,7 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ From 4b4b13d5fec8a82ed2780c487e49cfc4321a8c14 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Tue, 28 Oct 2014 12:19:00 +0100 Subject: [PATCH 040/101] powerpc/boot: Parse chosen/cmdline-timeout parameter On some platforms a 5 second timeout during boot might be quite long, so make it configurable. Run the loop at least once to let the user stop the boot by holding a key pressed. If the timeout is set to 0, don't wait for input, which can be used as a workaround if the boot hangs on random data coming in on the serial port. Signed-off-by: Simon Kagstrom [mpe: Changelog wording & whitespace] Signed-off-by: Michael Ellerman --- arch/powerpc/boot/main.c | 15 +++++++++++++-- arch/powerpc/boot/ops.h | 2 +- arch/powerpc/boot/serial.c | 6 +++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index d367a0aece2a..d80161b633f4 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -144,13 +144,24 @@ static char cmdline[BOOT_COMMAND_LINE_SIZE] static void prep_cmdline(void *chosen) { + unsigned int getline_timeout = 5000; + int v; + int n; + + /* Wait-for-input time */ + n = getprop(chosen, "linux,cmdline-timeout", &v, sizeof(v)); + if (n == sizeof(v)) + getline_timeout = v; + if (cmdline[0] == '\0') getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1); printf("\n\rLinux/PowerPC load: %s", cmdline); + /* If possible, edit the command line */ - if (console_ops.edit_cmdline) - console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE); + if (console_ops.edit_cmdline && getline_timeout) + console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE, getline_timeout); + printf("\n\r"); /* Put the command line back into the devtree for the kernel */ diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 8aad3c55aeda..5e75e1c5518e 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -58,7 +58,7 @@ extern struct dt_ops dt_ops; struct console_ops { int (*open)(void); void (*write)(const char *buf, int len); - void (*edit_cmdline)(char *buf, int len); + void (*edit_cmdline)(char *buf, int len, unsigned int getline_timeout); void (*close)(void); void *data; }; diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index f2156f07571f..167ee9433de6 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -33,7 +33,7 @@ static void serial_write(const char *buf, int len) scdp->putc(*buf++); } -static void serial_edit_cmdline(char *buf, int len) +static void serial_edit_cmdline(char *buf, int len, unsigned int timeout) { int timer = 0, count; char ch, *cp; @@ -44,7 +44,7 @@ static void serial_edit_cmdline(char *buf, int len) cp = &buf[count]; count++; - while (timer++ < 5*1000) { + do { if (scdp->tstc()) { while (((ch = scdp->getc()) != '\n') && (ch != '\r')) { /* Test for backspace/delete */ @@ -70,7 +70,7 @@ static void serial_edit_cmdline(char *buf, int len) break; /* Exit 'timer' loop */ } udelay(1000); /* 1 msec */ - } + } while (timer++ < timeout); *cp = 0; } From 1bc9e47aa8e4b0f9629e218d4c7667308ca6af67 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 30 Oct 2014 15:43:43 +1100 Subject: [PATCH 041/101] powerpc/jump_label: Use HAVE_JUMP_LABEL Commit d4fe0965e208 ("powerpc/jump_label: use HAVE_JUMP_LABEL?") missed a few conversions. Change the remaining uses of CONFIG_JUMP_LABEL to HAVE_JUMP_LABEL. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 2 +- arch/powerpc/platforms/pseries/hvCall.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index feb549aa3eea..cf7266de4c2f 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -18,7 +18,7 @@ .section ".text" #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define OPAL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) #else diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 3fda3f17b84e..ccd53f91e8aa 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -18,7 +18,7 @@ #ifdef CONFIG_TRACEPOINTS -#ifndef CONFIG_JUMP_LABEL +#ifndef HAVE_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -78,7 +78,7 @@ hcall_tracepoint_refcount: mr r5,BUFREG; \ __HCALL_INST_POSTCALL -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define HCALL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) #else From e88f157de5e0b327b1b26e3260de4b16c07e2d19 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 27 Oct 2014 20:56:14 +1100 Subject: [PATCH 042/101] powerpc: Remove unused vgacon_remap_base & fix build break The build is broken with CONFIG_PPC32=y, CONFIG_FB_VGA16=y and CONFIG_VGA_CONSOLE=n. The problem is that vgacon_remap_base is not defined. It's used in: #define VGA_MAP_MEM(x,s) (x + vgacon_remap_base) Which is used in the vga16fb.c code. Digging down it seems vgacon_remap_base is never initialised. It used to be, back in arch/ppc (pplus.c and prep_setup.c), but none of that code ever made it to arch/powerpc. So given it's been unused for >6 years, remove it. Whether vga16fb.c works on 32-bit is another question, but this patch shouldn't affect it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vga.h | 4 +--- arch/powerpc/kernel/setup_32.c | 5 ----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index a2eac409c1ec..e5f8dd366212 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -38,12 +38,10 @@ static inline u16 scr_readw(volatile const u16 *addr) #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ -extern unsigned long vgacon_remap_base; - #ifdef __powerpc64__ #define VGA_MAP_MEM(x,s) ((unsigned long) ioremap((x), s)) #else -#define VGA_MAP_MEM(x,s) (x + vgacon_remap_base) +#define VGA_MAP_MEM(x,s) (x) #endif #define vga_readb(x) (*(x)) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ba3a3c3b8c66..bb02e9f6944e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -52,11 +52,6 @@ unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; -#ifdef CONFIG_VGA_CONSOLE -unsigned long vgacon_remap_base; -EXPORT_SYMBOL(vgacon_remap_base); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. From a3e5b356b3ab40b00ecb0f6fde445db719c100b5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:25 +1100 Subject: [PATCH 043/101] powerpc: Don't use local named register variable in current_thread_info LLVM doesn't support local named register variables and is unlikely to. current_thread_info is using one, fix it by moving it out and calling it __current_r1(). I gave it a bit of an obscure name because we don't want anyone else using it - they should use current_stack_pointer(). This specific case is performance critical and we can't afford to call a function to get it. Furthermore it isn't important to know exactly where in the stack we are since we mask the lower bits. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/thread_info.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index b034ecdb7c74..ebc4f165690a 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -71,13 +71,12 @@ struct thread_info { #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ +register unsigned long __current_r1 asm("r1"); static inline struct thread_info *current_thread_info(void) { - register unsigned long sp asm("r1"); - /* gcc4, at least, is smart enough to turn this into a single * rlwinm for ppc32 and clrrdi for ppc64 */ - return (struct thread_info *)(sp & ~(THREAD_SIZE-1)); + return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1)); } #endif /* __ASSEMBLY__ */ From 6f791bef76d66923bc250cbbf4ddbf3f1c944686 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:26 +1100 Subject: [PATCH 044/101] powerpc: Remove double braces in alignment code. Looks like I introduced this when adding LE support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/align.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 34f55524d456..86150fbb42c3 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -908,7 +908,7 @@ int fix_alignment(struct pt_regs *regs) flush_fp_to_thread(current); } - if ((nb == 16)) { + if (nb == 16) { if (flags & F) { /* Special case for 16-byte FP loads and stores */ PPC_WARN_ALIGNMENT(fp_pair, regs); From ecaf5fa0a5096a237482ddc4b3c46becf531db1b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:27 +1100 Subject: [PATCH 045/101] powerpc: LLVM complains about forward declaration of struct rtas_sensors Move the declaration up to silence the warning. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas-proc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 8777fb02349f..fb2fb3ea85e5 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -113,17 +113,6 @@ #define SENSOR_PREFIX "ibm,sensor-" #define cel_to_fahr(x) ((x*9/5)+32) - -/* Globals */ -static struct rtas_sensors sensors; -static struct device_node *rtas_node = NULL; -static unsigned long power_on_time = 0; /* Save the time the user set */ -static char progress_led[MAX_LINELENGTH]; - -static unsigned long rtas_tone_frequency = 1000; -static unsigned long rtas_tone_volume = 0; - -/* ****************STRUCTS******************************************* */ struct individual_sensor { unsigned int token; unsigned int quant; @@ -134,6 +123,15 @@ struct rtas_sensors { unsigned int quant; }; +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + /* ****************************************************************** */ /* Declarations */ static int ppc_rtas_sensors_show(struct seq_file *m, void *v); From 7aa189c8f57f2141b8655c2a13c7486d0844d490 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 3 Nov 2014 08:18:06 +1100 Subject: [PATCH 046/101] powerpc/pseries: Quieten ibm,pcie-link-speed-stats warning The ibm,pcie-link-speed-stats isn't mandatory, so we shouldn't print a high priority error message when missing. One example where we see this is QEMU. Reduce it to pr_debug. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 67e48594040c..fe16a50700de 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -134,7 +134,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) of_node_put(pdn); if (rc) { - pr_err("no ibm,pcie-link-speed-stats property\n"); + pr_debug("no ibm,pcie-link-speed-stats property\n"); return 0; } From 20f1aae6cb0120fa54c0fe81614f6983df3b420a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 3 Nov 2014 08:34:01 +1100 Subject: [PATCH 047/101] powerpc/pseries: Quieten relocation on exceptions warning The H_SET_MODE hcall returns H_P2 if a function is not implemented and all callers should handle this case. The call to enable relocation on exceptions currently prints an error message if the feature is not implemented. While H_SET_MODE was first introduced on POWER8 (which has relocation on exceptions), it has been now added on some POWER7 configurations (which does not). Check for H_P2 and print an informational message instead. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index db0fc0c07568..8b8fb19a4097 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -499,7 +499,11 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_SET_MODE)) { long rc; - if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) { + + rc = pSeries_enable_reloc_on_exc(); + if (rc == H_P2) { + pr_info("Relocation on exceptions not supported\n"); + } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation on exceptions: " "%ld\n", rc); } From 0f9da5cb746581ae2de120c78a8afe29fb594fe4 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:44 +0200 Subject: [PATCH 048/101] powerpc/4xx/cpm: delete unneeded test before of_node_put Simplify the error path to avoid calling of_node_put when it is not needed. Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_cpm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c index 82e2cfe35c62..ba95adf81d8d 100644 --- a/arch/powerpc/sysdev/ppc4xx_cpm.c +++ b/arch/powerpc/sysdev/ppc4xx_cpm.c @@ -281,7 +281,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: could not parse dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } cpm.dcr_host = dcr_map(np, dcr_base, dcr_len); @@ -290,7 +290,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: failed to map dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } /* All 4xx SoCs with a CPM controller have one of two @@ -330,9 +330,9 @@ static int __init cpm_init(void) if (cpm.standby || cpm.suspend) suspend_set_ops(&cpm_suspend_ops); +node_put: + of_node_put(np); out: - if (np) - of_node_put(np); return ret; } From 4cdd641dddd0ecd93f7b84fc3622df83937f6094 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:45 +0200 Subject: [PATCH 049/101] powerpc/fsl: fsl_soc: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/fsl_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 1e04568da3b9..99269c041615 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -197,8 +197,7 @@ static int __init setup_rstcr(void) if (!rstcr && ppc_md.restart == fsl_rstcr_restart) printk(KERN_ERR "No RSTCR register, warm reboot won't work\n"); - if (np) - of_node_put(np); + of_node_put(np); return 0; } From 756d36790ad0fa221af650095fa52764474854b7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:46 +0200 Subject: [PATCH 050/101] powerpc/mpc5xxx: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/mpc5xxx_clocks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 5492dc5f56f4..f4f0301b9a60 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -26,8 +26,7 @@ unsigned long mpc5xxx_get_bus_frequency(struct device_node *node) of_node_put(node); node = np; } - if (node) - of_node_put(node); + of_node_put(node); return p_bus_freq ? *p_bus_freq : 0; } From 498b65147247f61f3f7f4deb35484ff98556e798 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:47 +0200 Subject: [PATCH 051/101] powerpc/pseries: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 49bcf7c09c49..1cc3db95ee71 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -574,8 +574,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (isa_dn && isa_dn != dn) isa_dn = isa_dn->parent; - if (isa_dn_orig) - of_node_put(isa_dn_orig); + of_node_put(isa_dn_orig); /* Count number of direct PCI children of the PHB. */ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) From 6609ed14de75bde7a99b33e9be9f1873da91f07d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:48 +0200 Subject: [PATCH 052/101] powerpc/gamecube/wii: delete unneeded test before of_node_put Simplify the error path to avoid calling of_node_put when it is not needed. Signed-off-by: Julia Lawall Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index 20a8ed91962e..7feb325b636b 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -247,7 +247,7 @@ void __init ug_udbg_init(void) np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi"); if (!np) { udbg_printf("%s: EXI node not found\n", __func__); - goto done; + goto out; } exi_io_base = ug_udbg_setup_exi_io_base(np); @@ -267,8 +267,8 @@ void __init ug_udbg_init(void) } done: - if (np) - of_node_put(np); + of_node_put(np); +out: return; } From cd32e2dcc9de6c27ecbbfc0e2079fb64b42bad5f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Nov 2014 09:12:28 +1100 Subject: [PATCH 053/101] powerpc: Fix bad NULL pointer check in udbg_uart_getc_poll() We have some code in udbg_uart_getc_poll() that tries to protect against a NULL udbg_uart_in, but gets it all wrong. Found with the LLVM static analyzer (scan-build). Fixes: 309257484cc1 ("powerpc: Cleanup udbg_16550 and add support for LPC PIO-only UARTs") Signed-off-by: Anton Blanchard [mpe: Add some newlines for readability while we're here] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/udbg_16550.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 6e7c4923b5ea..411116c38da4 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -69,8 +69,12 @@ static void udbg_uart_putc(char c) static int udbg_uart_getc_poll(void) { - if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + if (!udbg_uart_in) + return -1; + + if (!(udbg_uart_in(UART_LSR) & LSR_DR)) return udbg_uart_in(UART_RBR); + return -1; } From d1d5304fcbc519f5fb12cbcca9c4fdd1d94447d7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:29:28 +1100 Subject: [PATCH 054/101] powerpc/mm: Use PAGE_FACTOR PAGE_FACTOR was defined to reflect the difference between configured page size and fixed 4KB page size. Replace (PAGE_SHIFT - HW_PAGE_SHIFT) with PAGE_FACTOR. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_low_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 057cbbb4c576..5094f32b706e 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -514,7 +514,7 @@ htab_insert_pte: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- htab_special_pfn - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT @@ -544,7 +544,7 @@ htab_call_hpte_insert1: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- 3f - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 3: sldi r5,r5,HW_PAGE_SHIFT From 8b91a2554610ac5e341de8fb0b5715fe90a0f2e2 Mon Sep 17 00:00:00 2001 From: "Suresh E. Warrier" Date: Mon, 3 Nov 2014 15:46:42 +1100 Subject: [PATCH 055/101] powerpc: Save/restore PPR for KVM hypercalls The system call FLIH (first-level interrupt handler) at 0xc00 unconditionally sets hardware priority to medium. For hypercalls, this means we lose guest OS priority. The front end (do_kvm_0x**) to the KVM interrupt handler always assumes that PPR priority is saved in PACA exception save area, so it copies this to the kvm_hstate structure. For hypercalls, this would be the saved priority from any previous exception. Eventually, the guest gets resumed with an incorrect priority. The fix is to save the PPR priority in PACA exception save area before switching HMT priorities in the FLIH so that existing code described above in the KVM interrupt handler can copy it from there into the VCPU's saved context. Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras [mpe: Dropped HMT_MEDIUM_PPR_DISCARD and reworded comment] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 72e783ea0681..a1d45c161e24 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -292,15 +292,26 @@ decrementer_pSeries: . = 0xc00 .globl system_call_pSeries system_call_pSeries: - HMT_MEDIUM + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER SET_SCRATCH0(r13) GET_PACA(r13) std r9,PACA_EXGEN+EX_R9(r13) + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); + HMT_MEDIUM; std r10,PACA_EXGEN+EX_R10(r13) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); mfcr r9 KVMTEST(0xc00) GET_SCRATCH0(r13) +#else + HMT_MEDIUM; #endif SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID From 7048c846949ac1feb09d1b624ea0e8c351d92a7b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 3 Nov 2014 15:46:43 +1100 Subject: [PATCH 056/101] powerpc: Fix compilation of emulate_step() Commit be96f63375a1 ("powerpc: Split out instruction analysis part of emulate_step()") added some calls to do_fp_load() and do_fp_store(), which fail to compile on configs with CONFIG_PPC_FPU=n and CONFIG_PPC_EMULATE_SSTEP=y. This fixes the compile by adding #ifdef CONFIG_PPC_FPU around the code that calls these functions. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 54651fc2d412..dc885b30f7a6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1865,6 +1865,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } goto ldst_done; +#ifdef CONFIG_PPC_FPU case LOAD_FP: if (regs->msr & MSR_LE) return 0; @@ -1873,7 +1874,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (regs->msr & MSR_LE) @@ -1919,6 +1920,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = write_mem(op.val, op.ea, size, regs); goto ldst_done; +#ifdef CONFIG_PPC_FPU case STORE_FP: if (regs->msr & MSR_LE) return 0; @@ -1927,7 +1929,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (regs->msr & MSR_LE) From 608b286d1ddf38a7ceb624d2b689af095816d91c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 6 Nov 2014 11:38:27 +0800 Subject: [PATCH 057/101] powerpc/powernv: Add OPAL IPMI interface Recent OPAL firmare adds a couple of functions to send and receive IPMI messages: https://github.com/open-power/skiboot/commit/b2a374da This change updates the token list and wrappers to suit, and adds the platform devices for any IPMI interfaces. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 17 +++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 2 ++ arch/powerpc/platforms/powernv/opal.c | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9124b0ede1fc..5d073e50cac8 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,8 @@ struct opal_sg_list { #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 +#define OPAL_IPMI_SEND 107 +#define OPAL_IPMI_RECV 108 #ifndef __ASSEMBLY__ @@ -452,6 +454,17 @@ struct opal_msg { __be64 params[8]; }; +enum { + OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1, +}; + +struct opal_ipmi_msg { + uint8_t version; + uint8_t netfn; + uint8_t cmd; + uint8_t data[]; +}; + struct opal_machine_check_event { enum OpalMCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -963,6 +976,10 @@ int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); +int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t msg_len); +int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t *msg_len); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index feb549aa3eea..4f4e4009c289 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -250,3 +250,5 @@ OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d019b081df9d..0297702e8ae9 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -644,6 +644,16 @@ static void __init opal_dump_region_init(void) pr_warn("DUMP: Failed to register kernel log buffer. " "rc = %d\n", rc); } + +static void opal_ipmi_init(struct device_node *opal_node) +{ + struct device_node *np; + + for_each_child_of_node(opal_node, np) + if (of_device_is_compatible(np, "ibm,opal-ipmi")) + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -707,6 +717,8 @@ static int __init opal_init(void) opal_msglog_init(); } + opal_ipmi_init(opal_node); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -742,6 +754,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_ipmi_send); +EXPORT_SYMBOL_GPL(opal_ipmi_recv); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, From e7a7a65ed2e90c41341aafef9458d0aa8143a558 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 11 Nov 2014 12:50:22 +0800 Subject: [PATCH 058/101] powerpc: Fix comment typos in arch/powerpc/include/asm/bitops.h In arch/powerpc/include/asm/bitops.h, the comments about bit numbers in large (> 1 word) bitmaps have two typos: - On ppc64 system, the LSB of the 4th word should be bit 192 rather than 196, because if it's bit 196, bit 192-195 will be missing in the bitmap. - On ppc32 system, the LSB of the second word should be bit 32 rather than 31, because bit 31 is already in the first word. This patch fixes these typos. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index c633f058ba43..59abc620f8e8 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -14,9 +14,9 @@ * * The bitop functions are defined to work on unsigned longs, so for a * ppc64 system the bits end up numbered: - * |63..............0|127............64|191...........128|255...........196| + * |63..............0|127............64|191...........128|255...........192| * and on ppc32: - * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224| + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * * There are a few little-endian macros used mostly for filesystem * bitmaps, these work on similar bit arrays layouts, but From d8ee6f34fdd28ade6b7b3a9be175bc5c0c3cfb38 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Nov 2014 16:54:54 +1100 Subject: [PATCH 059/101] powerpc/xmon: Fix build when 4xx=y and 44x=n dump_tlb_44x() is only defined when 44x=y, but the ifdef in xmon.c checks for 4xx, leading to a build failure: arch/powerpc/xmon/xmon.c:912:4: error: implicit declaration of function 'dump_tlb_44x' Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 506d25681fe3..ef18021d52e1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -907,7 +907,7 @@ cmds(struct pt_regs *excp) case 'u': dump_segments(); break; -#elif defined(CONFIG_4xx) +#elif defined(CONFIG_44x) case 'u': dump_tlb_44x(); break; From 31ceb157f294843563330658c3ad6e3b5a1c4fe2 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:51 -0600 Subject: [PATCH 060/101] dt/bindings: Introduce the FSL QorIQ DPAA BMan The Buffer Manager is part of the Data-Path Acceleration Architecture (DPAA). BMan supports hardware allocation and deallocation of buffers belonging to pools originally created by software with configurable depletion thresholds. This binding covers the CCSR space programming model Signed-off-by: Emil Medve Change-Id: I3ec479bfb3c91951e96902f091f5d7d2adbef3b2 Signed-off-by: Scott Wood --- .../devicetree/bindings/soc/fsl/bman.txt | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 Documentation/devicetree/bindings/soc/fsl/bman.txt diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt b/Documentation/devicetree/bindings/soc/fsl/bman.txt new file mode 100644 index 000000000000..9f80bf8709ac --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt @@ -0,0 +1,125 @@ +QorIQ DPAA Buffer Manager Device Tree Bindings + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - BMan Node + - BMan Private Memory Node + - Example + +BMan Node + +The Buffer Manager is part of the Data-Path Acceleration Architecture (DPAA). +BMan supports hardware allocation and deallocation of buffers belonging to pools +originally created by software with configurable depletion thresholds. This +binding covers the CCSR space programming model + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,bman" + May include "fsl,-bman" + +- reg + Usage: Required + Value type: + Definition: Registers region within the CCSR address space + +The BMan revision information is located in the BMAN_IP_REV_1/2 registers which +are located at offsets 0xbf8 and 0xbfc + +- interrupts + Usage: Required + Value type: + Definition: Standard property. The error interrupt + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +Devices connected to a BMan instance via Direct Connect Portals (DCP) must link +to the respective BMan instance + +- fsl,bman + Usage: Required + Value type: + Description: List of phandle and DCP index pairs, to the BMan instance + to which this device is connected via the DCP + +BMan Private Memory Node + +BMan requires a contiguous range of physical memory used for the backing store +for BMan Free Buffer Proxy Records (FBPR). This memory is reserved/allocated as a +node under the /reserved-memory node + +The BMan FBPR memory node must be named "bman-fbpr" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,bman-fbpr" + +The following constraints are relevant to the FBPR private memory: + - The size must be 2^(size + 1), with size = 11..33. That is 4 KiB to + 16 GiB + - The alignment must be a muliptle of the memory size + +The size of the FBPR must be chosen by observing the hardware features configured +via the Reset Configuration Word (RCW) and that are relevant to a specific board +(e.g. number of MAC(s) pinned-out, number of offline/host command FMan ports, +etc.). The size configured in the DT must reflect the hardware capabilities and +not the specific needs of an application + +For additional details about reserved memory regions see reserved-memory.txt + +EXAMPLE + +The example below shows a BMan FBPR dynamic allocation memory node + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + compatible = "fsl,bman-fbpr"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + }; + +The example below shows a (P4080) BMan CCSR-space node + + crypto@300000 { + ... + fsl,bman = <&bman, 2>; + ... + }; + + bman: bman@31a000 { + compatible = "fsl,bman"; + reg = <0x31a000 0x1000>; + interrupts = <16 2 1 2>; + fsl,liodn = <0x17>; + memory-region = <&bman_fbpr>; + }; + + fman@400000 { + ... + fsl,bman = <&bman, 0>; + ... + }; From 5f3af4008bfcb388a3193fd871e1b2e94ba4b09c Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:52 -0600 Subject: [PATCH 061/101] dt/bindings: Introduce the FSL QorIQ DPAA BMan portal(s) Portals are memory mapped interfaces to BMan that allow low-latency, lock-less interaction by software running on processor cores, accelerators and network interfaces with the BMan Signed-off-by: Emil Medve Change-Id: I6d245ffc14ba3d0e91d403ac7c3b91b75a9e6a95 Signed-off-by: Scott Wood --- .../bindings/soc/fsl/bman-portals.txt | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Documentation/devicetree/bindings/soc/fsl/bman-portals.txt diff --git a/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt new file mode 100644 index 000000000000..2a00e14e11e0 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt @@ -0,0 +1,56 @@ +QorIQ DPAA Buffer Manager Portals Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - BMan Portal + - Example + +BMan Portal Node + +Portals are memory mapped interfaces to BMan that allow low-latency, lock-less +interaction by software running on processor cores, accelerators and network +interfaces with the BMan + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,bman-portal-" + May include "fsl,-bman-portal" or "fsl,bman-portal" + +- reg + Usage: Required + Value type: + Definition: Two regions. The first is the cache-enabled region of + the portal. The second is the cache-inhibited region of + the portal + +- interrupts + Usage: Required + Value type: + Definition: Standard property + +EXAMPLE + +The example below shows a (P4080) BMan portals container/bus node with two portals + + bman-portals@ff4000000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges = <0 0xf 0xf4000000 0x200000>; + + bman-portal@0 { + compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <105 2 0 0>; + }; + bman-portal@4000 { + compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <107 2 0 0>; + }; + }; From 76a4f03f3ec7221112c20d053a1233540a601473 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:53 -0600 Subject: [PATCH 062/101] dt/bindings: Introduce the FSL QorIQ DPAA QMan The Queue Manager is part of the Data-Path Acceleration Architecture (DPAA). QMan supports queuing and QoS scheduling of frames to CPUs, network interfaces and DPAA logic modules, maintains packet ordering within flows. Besides providing flow-level queuing, is also responsible for congestion management functions such as RED/WRED, congestion notifications and tail discards. This binding covers the CCSR space programming model Signed-off-by: Emil Medve Change-Id: I3acb223893e42003d6c9dc061db568ec0b10d29b Signed-off-by: Scott Wood --- .../devicetree/bindings/soc/fsl/qman.txt | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 Documentation/devicetree/bindings/soc/fsl/qman.txt diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt b/Documentation/devicetree/bindings/soc/fsl/qman.txt new file mode 100644 index 000000000000..063e3a0b9d04 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt @@ -0,0 +1,165 @@ +QorIQ DPAA Queue Manager Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - QMan Node + - QMan Private Memory Nodes + - Example + +QMan Node + +The Queue Manager is part of the Data-Path Acceleration Architecture (DPAA). QMan +supports queuing and QoS scheduling of frames to CPUs, network interfaces and +DPAA logic modules, maintains packet ordering within flows. Besides providing +flow-level queuing, is also responsible for congestion management functions such +as RED/WRED, congestion notifications and tail discards. This binding covers the +CCSR space programming model + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman" + May include "fsl,-qman" + +- reg + Usage: Required + Value type: + Definition: Registers region within the CCSR address space + +The QMan revision information is located in the QMAN_IP_REV_1/2 registers which +are located at offsets 0xbf8 and 0xbfc + +- interrupts + Usage: Required + Value type: + Definition: Standard property. The error interrupt + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +- clocks + Usage: See clock-bindings.txt and qoriq-clock.txt + Value type: + Definition: Reference input clock. Its frequency is half of the + platform clock + +Devices connected to a QMan instance via Direct Connect Portals (DCP) must link +to the respective QMan instance + +- fsl,qman + Usage: Required + Value type: + Description: List of phandle and DCP index pairs, to the QMan instance + to which this device is connected via the DCP + +QMan Private Memory Nodes + +QMan requires two contiguous range of physical memory used for the backing store +for QMan Frame Queue Descriptor (FQD) and Packed Frame Descriptor Record (PFDR). +This memory is reserved/allocated as a nodes under the /reserved-memory node + +The QMan FQD memory node must be named "qman-fqd" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,qman-fqd" + +The QMan PFDR memory node must be named "qman-pfdr" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,qman-pfdr" + +The following constraints are relevant to the FQD and PFDR private memory: + - The size must be 2^(size + 1), with size = 11..29. That is 4 KiB to + 1 GiB + - The alignment must be a muliptle of the memory size + +The size of the FQD and PFDP must be chosen by observing the hardware features +configured via the Reset Configuration Word (RCW) and that are relevant to a +specific board (e.g. number of MAC(s) pinned-out, number of offline/host command +FMan ports, etc.). The size configured in the DT must reflect the hardware +capabilities and not the specific needs of an application + +For additional details about reserved memory regions see reserved-memory.txt + +EXAMPLE + +The example below shows a QMan FQD and a PFDR dynamic allocation memory nodes + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + qman_fqd: qman-fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + +The example below shows a (P4080) QMan CCSR-space node + + clockgen: global-utilities@e1000 { + ... + sysclk: sysclk { + ... + }; + ... + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; + ... + }; + + crypto@300000 { + ... + fsl,qman = <&qman, 2>; + ... + }; + + qman: qman@318000 { + compatible = "fsl,qman"; + reg = <0x318000 0x1000>; + interrupts = <16 2 1 3> + fsl,liodn = <0x16>; + memory-region = <&qman_fqd &qman_pfdr>; + clocks = <&platform_pll 1>; + }; + + fman@400000 { + ... + fsl,qman = <&qman, 0>; + ... + }; From f3f6743d1b719ba53aa69493bf76b76a8871bbfa Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:54 -0600 Subject: [PATCH 063/101] dt/bindings: Introduce the FSL QorIQ DPAA QMan portal(s) Portals are memory mapped interfaces to QMan that allow low-latency, lock-less interaction by software running on processor cores, accelerators and network interfaces with the QMan Signed-off-by: Emil Medve Change-Id: I29764fa8093b5ce65460abc879446795c50d7185 Signed-off-by: Scott Wood --- .../bindings/soc/fsl/qman-portals.txt | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 Documentation/devicetree/bindings/soc/fsl/qman-portals.txt diff --git a/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt new file mode 100644 index 000000000000..48c4dae5d6f9 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt @@ -0,0 +1,154 @@ +QorIQ DPAA Queue Manager Portals Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - QMan Portal + - QMan Pool Channel + - Example + +QMan Portal Node + +Portals are memory mapped interfaces to QMan that allow low-latency, lock-less +interaction by software running on processor cores, accelerators and network +interfaces with the QMan + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman-portal-" + May include "fsl,-qman-portal" or "fsl,qman-portal" + +- reg + Usage: Required + Value type: + Definition: Two regions. The first is the cache-enabled region of + the portal. The second is the cache-inhibited region of + the portal + +- interrupts + Usage: Required + Value type: + Definition: Standard property + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN + (FLIODN) + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +- fsl,qman-channel-id + Usage: Required + Value type: + Definition: The hardware index of the channel. This can also be + determined by dividing any of the channel's 8 work queue + IDs by 8 + +In addition to these properties the qman-portals should have sub-nodes to +represent the HW devices/portals that are connected to the software portal +described here + +The currently supported sub-nodes are: + * fman0 + * fman1 + * pme + * crypto + +These subnodes should have the following properties: + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + +- dev-handle + Usage: Required + Value type: + Definition: The phandle to the particular hardware device that this + portal is connected to. + +DPAA QMan Pool Channel Nodes + +Pool Channels are defined with the following properties. + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman-pool-channel" + May include "fsl,-qman-pool-channel" + +- fsl,qman-channel-id + Usage: Required + Value type: + Definition: The hardware index of the channel. This can also be + determined by dividing any of the channel's 8 work queue + IDs by 8 + +EXAMPLE + +The example below shows a (P4080) QMan portals container/bus node with two portals + + qman-portals@ff4200000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges = <0 0xf 0xf4200000 0x200000>; + + qman-portal@0 { + compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal"; + reg = <0 0x4000>, <0x100000 0x1000>; + interrupts = <104 2 0 0>; + fsl,liodn = <1 2>; + fsl,qman-channel-id = <0>; + + fman0 { + fsl,liodn = <0x21>; + dev-handle = <&fman0>; + }; + fman1 { + fsl,liodn = <0xa1>; + dev-handle = <&fman1>; + }; + crypto { + fsl,liodn = <0x41 0x66>; + dev-handle = <&crypto>; + }; + }; + qman-portal@4000 { + compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <106 2 0 0>; + fsl,liodn = <3 4>; + fsl,qman-channel-id = <1>; + + fman0 { + fsl,liodn = <0x22>; + dev-handle = <&fman0>; + }; + fman1 { + fsl,liodn = <0xa2>; + dev-handle = <&fman1>; + }; + crypto { + fsl,liodn = <0x42 0x67>; + dev-handle = <&crypto>; + }; + }; + }; From 297d35fd2a7d3fbd4e5c0f0c1c18213117ba11ba Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Wed, 17 Sep 2014 14:08:30 +0300 Subject: [PATCH 064/101] powerpc/fsl: Frame Manager Device Tree binding document The Frame Manager (FMan) combines the Ethernet network interfaces with packet distribution logic to provide intelligent distribution and queuing decisions for incoming traffic at line rate. This binding document describes Freescale's Frame Manager hardware attributes that are used by the Frame Manager driver for its basic initialization and configuration. Signed-off-by: Igal Liberman Signed-off-by: Scott Wood --- .../devicetree/bindings/powerpc/fsl/fman.txt | 529 ++++++++++++++++++ 1 file changed, 529 insertions(+) create mode 100644 Documentation/devicetree/bindings/powerpc/fsl/fman.txt diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt new file mode 100644 index 000000000000..da8e5f25768f --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -0,0 +1,529 @@ +============================================================================= +Freescale Frame Manager Device Bindings + +CONTENTS + - FMan Node + - FMan Port Node + - FMan MURAM Node + - FMan dTSEC/XGEC/mEMAC Node + - FMan IEEE 1588 Node + - Example + +============================================================================= +FMan Node + +DESCRIPTION + +Due to the fact that the FMan is an aggregation of sub-engines (ports, MACs, +etc.) the FMan node will have child nodes for each of them. + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must include "fsl,fman" + FMan version can be determined via FM_IP_REV_1 register in the + FMan block. The offset is 0xc4 from the beginning of the + Frame Processing Manager memory map (0xc3000 from the + beginning of the FMan node). + +- cell-index + Usage: required + Value type: + Definition: Specifies the index of the FMan unit. + + The cell-index value may be used by the SoC, to identify the + FMan unit in the SoC memory map. In the table bellow, + there's a description of the cell-index use in each SoC: + + - P1023: + register[bit] FMan unit cell-index + ============================================================ + DEVDISR[1] 1 0 + + - P2041, P3041, P4080 P5020, P5040: + register[bit] FMan unit cell-index + ============================================================ + DCFG_DEVDISR2[6] 1 0 + DCFG_DEVDISR2[14] 2 1 + (Second FM available only in P4080 and P5040) + + - B4860, T1040, T2080, T4240: + register[bit] FMan unit cell-index + ============================================================ + DCFG_CCSR_DEVDISR2[24] 1 0 + DCFG_CCSR_DEVDISR2[25] 2 1 + (Second FM available only in T4240) + + DEVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in + the specific SoC "Device Configuration/Pin Control" Memory + Map. + +- reg + Usage: required + Value type: + Definition: A standard property. Specifies the offset of the + following configuration registers: + - BMI configuration registers. + - QMI configuration registers. + - DMA configuration registers. + - FPM configuration registers. + - FMan controller configuration registers. + +- ranges + Usage: required + Value type: + Definition: A standard property. + +- clocks + Usage: required + Value type: + Definition: phandle for fman clock. + +- clock-names + usage: optional + Value type: + Definition: A standard property + +- interrupts + Usage: required + Value type: + Definition: A pair of IRQs are specified in this property. + The first element is associated with the event interrupts and + the second element is associated with the error interrupts. + +- fsl,qman-channel-range + Usage: required + Value type: + Definition: Specifies the range of the available dedicated + channels in the FMan. The first cell specifies the beginning + of the range and the second cell specifies the number of + channels. + Further information available at: + "Work Queue (WQ) Channel Assignments in the QMan" section + in DPAA Reference Manual. + +============================================================================= +FMan MURAM Node + +DESCRIPTION + +FMan Internal memory - shared between all the FMan modules. +It contains data structures that are common and written to or read by +the modules. +FMan internal memory is split into the following parts: + Packet buffering (Tx/Rx FIFOs) + Frames internal context + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must include "fsl,fman-muram" + +- ranges + Usage: required + Value type: + Definition: A standard property. + Specifies the multi-user memory offset and the size within + the FMan. + +EXAMPLE + +muram@0 { + compatible = "fsl,fman-muram"; + ranges = <0 0x000000 0x28000>; +}; + +============================================================================= +FMan Port Node + +DESCRIPTION + +The Frame Manager (FMan) supports several types of hardware ports: + Ethernet receiver (RX) + Ethernet transmitter (TX) + Offline/Host command (O/H) + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include one of the following: + - "fsl,fman-v2-port-oh" for FManV2 OH ports + - "fsl,fman-v2-port-rx" for FManV2 RX ports + - "fsl,fman-v2-port-tx" for FManV2 TX ports + - "fsl,fman-v3-port-oh" for FManV3 OH ports + - "fsl,fman-v3-port-rx" for FManV3 RX ports + - "fsl,fman-v3-port-tx" for FManV3 TX ports + +- cell-index + Usage: required + Value type: + Definition: Specifies the hardware port id. + Each hardware port on the FMan has its own hardware PortID. + Super set of all hardware Port IDs available at FMan Reference + Manual under "FMan Hardware Ports in Freescale Devices" table. + + Each hardware port is assigned a 4KB, port-specific page in + the FMan hardware port memory region (which is part of the + FMan memory map). The first 4 KB in the FMan hardware ports + memory region is used for what are called common registers. + The subsequent 63 4KB pages are allocated to the hardware + ports. + The page of a specific port is determined by the cell-index. + +- reg + Usage: required + Value type: + Definition: There is one reg region describing the port + configuration registers. + +EXAMPLE + +port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; +}; + +port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; +}; + +port@81000 { + cell-index = <0x1>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x81000 0x1000>; +}; + +============================================================================= +FMan dTSEC/XGEC/mEMAC Node + +DESCRIPTION + +mEMAC/dTSEC/XGEC are the Ethernet network interfaces + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include one of the following: + - "fsl,fman-dtsec" for dTSEC MAC + - "fsl,fman-xgec" for XGEC MAC + - "fsl,fman-memac for mEMAC MAC + +- cell-index + Usage: required + Value type: + Definition: Specifies the MAC id. + + The cell-index value may be used by the FMan or the SoC, to + identify the MAC unit in the FMan (or SoC) memory map. + In the tables bellow there's a description of the cell-index + use, there are two tables, one describes the use of cell-index + by the FMan, the second describes the use by the SoC: + + 1. FMan Registers + + FManV2: + register[bit] MAC cell-index + ============================================================ + FM_EPI[16] XGEC 8 + FM_EPI[16+n] dTSECn n-1 + FM_NPI[11+n] dTSECn n-1 + n = 1,..,5 + + FManV3: + register[bit] MAC cell-index + ============================================================ + FM_EPI[16+n] mEMACn n-1 + FM_EPI[25] mEMAC10 9 + + FM_NPI[11+n] mEMACn n-1 + FM_NPI[10] mEMAC10 9 + FM_NPI[11] mEMAC9 8 + n = 1,..8 + + FM_EPI and FM_NPI are located in the FMan memory map. + + 2. SoC registers: + + - P2041, P3041, P4080 P5020, P5040: + register[bit] FMan MAC cell + Unit index + ============================================================ + DCFG_DEVDISR2[7] 1 XGEC 8 + DCFG_DEVDISR2[7+n] 1 dTSECn n-1 + DCFG_DEVDISR2[15] 2 XGEC 8 + DCFG_DEVDISR2[15+n] 2 dTSECn n-1 + n = 1,..5 + + - T1040, T2080, T4240, B4860: + register[bit] FMan MAC cell + Unit index + ============================================================ + DCFG_CCSR_DEVDISR2[n-1] 1 mEMACn n-1 + DCFG_CCSR_DEVDISR2[11+n] 2 mEMACn n-1 + n = 1,..6,9,10 + + EVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in + the specific SoC "Device Configuration/Pin Control" Memory + Map. + +- reg + Usage: required + Value type: + Definition: A standard property. + +- fsl,fman-ports + Usage: required + Value type: + Definition: An array of two phandles - the first references is + the FMan RX port and the second is the TX port used by this + MAC. + +- ptp-timer + Usage required + Value type: + Definition: A phandle for 1EEE1588 timer. + +EXAMPLE + +fman1_tx28: port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; +}; + +fman1_rx8: port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; +}; + +ptp-timer: ptp_timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; +}; + +ethernet@e0000 { + compatible = "fsl,fman-dtsec"; + cell-index = <0>; + reg = <0xe0000 0x1000>; + fsl,fman-ports = <&fman1_rx8 &fman1_tx28>; + ptp-timer = <&ptp-timer>; +}; + +============================================================================ +FMan IEEE 1588 Node + +DESCRIPTION + +The FMan interface to support IEEE 1588 + + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include "fsl,fman-ptp-timer". + +- reg + Usage: required + Value type: + Definition: A standard property. + +EXAMPLE + +ptp-timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; +}; + +============================================================================= +Example + +fman@400000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + compatible = "fsl,fman" + ranges = <0 0x400000 0x100000>; + reg = <0x400000 0x100000>; + clocks = <&fman_clk>; + clock-names = "fmanclk"; + interrupts = < + 96 2 0 0 + 16 2 1 1>; + fsl,qman-channel-range = <0x40 0xc>; + + muram@0 { + compatible = "fsl,fman-muram"; + reg = <0x0 0x28000>; + }; + + port@81000 { + cell-index = <1>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x81000 0x1000>; + }; + + port@82000 { + cell-index = <2>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x82000 0x1000>; + }; + + port@83000 { + cell-index = <3>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x83000 0x1000>; + }; + + port@84000 { + cell-index = <4>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x84000 0x1000>; + }; + + port@85000 { + cell-index = <5>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x85000 0x1000>; + }; + + port@86000 { + cell-index = <6>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x86000 0x1000>; + }; + + fman1_rx_0x8: port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; + }; + + fman1_rx_0x9: port@89000 { + cell-index = <0x9>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x89000 0x1000>; + }; + + fman1_rx_0xa: port@8a000 { + cell-index = <0xa>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8a000 0x1000>; + }; + + fman1_rx_0xb: port@8b000 { + cell-index = <0xb>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8b000 0x1000>; + }; + + fman1_rx_0xc: port@8c000 { + cell-index = <0xc>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8c000 0x1000>; + }; + + fman1_rx_0x10: port@90000 { + cell-index = <0x10>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x90000 0x1000>; + }; + + fman1_tx_0x28: port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; + }; + + fman1_tx_0x29: port@a9000 { + cell-index = <0x29>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa9000 0x1000>; + }; + + fman1_tx_0x2a: port@aa000 { + cell-index = <0x2a>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xaa000 0x1000>; + }; + + fman1_tx_0x2b: port@ab000 { + cell-index = <0x2b>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xab000 0x1000>; + }; + + fman1_tx_0x2c: port@ac0000 { + cell-index = <0x2c>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xac000 0x1000>; + }; + + fman1_tx_0x30: port@b0000 { + cell-index = <0x30>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xb0000 0x1000>; + }; + + ethernet@e0000 { + compatible = "fsl,fman-dtsec"; + cell-index = <0>; + reg = <0xe0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x8 &fman1_tx_0x28>; + }; + + ethernet@e2000 { + compatible = "fsl,fman-dtsec"; + cell-index = <1>; + reg = <0xe2000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x9 &fman1_tx_0x29>; + }; + + ethernet@e4000 { + compatible = "fsl,fman-dtsec"; + cell-index = <2>; + reg = <0xe4000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xa &fman1_tx_0x2a>; + }; + + ethernet@e6000 { + compatible = "fsl,fman-dtsec"; + cell-index = <3>; + reg = <0xe6000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xb &fman1_tx_0x2b>; + }; + + ethernet@e8000 { + compatible = "fsl,fman-dtsec"; + cell-index = <4>; + reg = <0xf0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xc &fman1_tx_0x2c>; + + ethernet@f0000 { + cell-index = <8>; + compatible = "fsl,fman-xgec"; + reg = <0xf0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>; + }; + + ptp-timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; + }; +}; From e327fff47b6778e56f8e1dbd6493c1b0bbad1ae8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 6 Nov 2014 20:56:07 -0600 Subject: [PATCH 065/101] powerpc/fsl: Update fman dt binding with clock name and qbman link The clock name "fmanclk" was given in the example, but not specified in the binding itself. Made clock-names mandatory as otherwise there's not much point having it. Added a reference to the fsl,qman and fsl,bman properties proposed in http://patchwork.ozlabs.org/patch/407034/ and http://patchwork.ozlabs.org/patch/407035/ Signed-off-by: Scott Wood --- .../devicetree/bindings/powerpc/fsl/fman.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt index da8e5f25768f..edeea160ca39 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -79,12 +79,12 @@ PROPERTIES - clocks Usage: required Value type: - Definition: phandle for fman clock. + Definition: phandle for the fman input clock. - clock-names - usage: optional + usage: required Value type: - Definition: A standard property + Definition: "fmanclk" for the fman input clock. - interrupts Usage: required @@ -104,6 +104,11 @@ PROPERTIES "Work Queue (WQ) Channel Assignments in the QMan" section in DPAA Reference Manual. +- fsl,qman +- fsl,bman + Usage: required + Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt + ============================================================================= FMan MURAM Node From e6a546fd03753ffbd9d2f2883210f8043a8791f7 Mon Sep 17 00:00:00 2001 From: Martijn de Gouw Date: Tue, 5 Aug 2014 15:52:32 +0200 Subject: [PATCH 066/101] powerpc/fsl-rio: add support for mapping inbound windows Add support for mapping and unmapping of inbound rapidio windows. This allows for drivers to open up a part of local memory on the rapidio network. Also applications can use this and tranfer blocks of data over the network. Signed-off-by: Martijn de Gouw [scottwood@freescale.com: updated commit message based on review] Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_rio.c | 104 ++++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/fsl_rio.h | 13 +++++ 2 files changed, 117 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index c04b718307c8..08d60f183dad 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -58,6 +58,19 @@ #define RIO_ISR_AACR 0x10120 #define RIO_ISR_AACR_AA 0x1 /* Accept All ID */ +#define RIWTAR_TRAD_VAL_SHIFT 12 +#define RIWTAR_TRAD_MASK 0x00FFFFFF +#define RIWBAR_BADD_VAL_SHIFT 12 +#define RIWBAR_BADD_MASK 0x003FFFFF +#define RIWAR_ENABLE 0x80000000 +#define RIWAR_TGINT_LOCAL 0x00F00000 +#define RIWAR_RDTYP_NO_SNOOP 0x00040000 +#define RIWAR_RDTYP_SNOOP 0x00050000 +#define RIWAR_WRTYP_NO_SNOOP 0x00004000 +#define RIWAR_WRTYP_SNOOP 0x00005000 +#define RIWAR_WRTYP_ALLOC 0x00006000 +#define RIWAR_SIZE_MASK 0x0000003F + #define __fsl_read_rio_config(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ @@ -266,6 +279,89 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, return 0; } +static void fsl_rio_inbound_mem_init(struct rio_priv *priv) +{ + int i; + + /* close inbound windows */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) + out_be32(&priv->inb_atmu_regs[i].riwar, 0); +} + +int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, + u64 rstart, u32 size, u32 flags) +{ + struct rio_priv *priv = mport->priv; + u32 base_size; + unsigned int base_size_log; + u64 win_start, win_end; + u32 riwar; + int i; + + if ((size & (size - 1)) != 0) + return -EINVAL; + + base_size_log = ilog2(size); + base_size = 1 << base_size_log; + + /* check if addresses are aligned with the window size */ + if (lstart & (base_size - 1)) + return -EINVAL; + if (rstart & (base_size - 1)) + return -EINVAL; + + /* check for conflicting ranges */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + win_start = ((u64)(in_be32(&priv->inb_atmu_regs[i].riwbar) & RIWBAR_BADD_MASK)) + << RIWBAR_BADD_VAL_SHIFT; + win_end = win_start + ((1 << ((riwar & RIWAR_SIZE_MASK) + 1)) - 1); + if (rstart < win_end && (rstart + size) > win_start) + return -EINVAL; + } + + /* find unused atmu */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + break; + } + if (i >= RIO_INB_ATMU_COUNT) + return -ENOMEM; + + out_be32(&priv->inb_atmu_regs[i].riwtar, lstart >> RIWTAR_TRAD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwbar, rstart >> RIWBAR_BADD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwar, RIWAR_ENABLE | RIWAR_TGINT_LOCAL | + RIWAR_RDTYP_SNOOP | RIWAR_WRTYP_SNOOP | (base_size_log - 1)); + + return 0; +} + +void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) +{ + u32 win_start_shift, base_start_shift; + struct rio_priv *priv = mport->priv; + u32 riwar, riwtar; + int i; + + /* skip default window */ + base_start_shift = lstart >> RIWTAR_TRAD_VAL_SHIFT; + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + + riwtar = in_be32(&priv->inb_atmu_regs[i].riwtar); + win_start_shift = riwtar & RIWTAR_TRAD_MASK; + if (win_start_shift == base_start_shift) { + out_be32(&priv->inb_atmu_regs[i].riwar, riwar & ~RIWAR_ENABLE); + return; + } + } +} + void fsl_rio_port_error_handler(int offset) { /*XXX: Error recovery is not implemented, we just clear errors */ @@ -389,6 +485,8 @@ int fsl_rio_setup(struct platform_device *dev) ops->add_outb_message = fsl_add_outb_message; ops->add_inb_buffer = fsl_add_inb_buffer; ops->get_inb_message = fsl_get_inb_message; + ops->map_inb = fsl_map_inb_mem; + ops->unmap_inb = fsl_unmap_inb_mem; rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0); if (!rmu_node) { @@ -602,6 +700,11 @@ int fsl_rio_setup(struct platform_device *dev) RIO_ATMU_REGS_PORT2_OFFSET)); priv->maint_atmu_regs = priv->atmu_regs + 1; + priv->inb_atmu_regs = (struct rio_inb_atmu_regs __iomem *) + (priv->regs_win + + ((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET : + RIO_INB_ATMU_REGS_PORT2_OFFSET)); + /* Set to receive any dist ID for serial RapidIO controller. */ if (port->phy_type == RIO_PHY_SERIAL) @@ -620,6 +723,7 @@ int fsl_rio_setup(struct platform_device *dev) rio_law_start = range_start; fsl_rio_setup_rmu(port, rmu_np[i]); + fsl_rio_inbound_mem_init(priv); dbell->mport[i] = port; diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h index ae8e27405a0d..d53407a34f32 100644 --- a/arch/powerpc/sysdev/fsl_rio.h +++ b/arch/powerpc/sysdev/fsl_rio.h @@ -50,9 +50,12 @@ #define RIO_S_DBELL_REGS_OFFSET 0x13400 #define RIO_S_PW_REGS_OFFSET 0x134e0 #define RIO_ATMU_REGS_DBELL_OFFSET 0x10C40 +#define RIO_INB_ATMU_REGS_PORT1_OFFSET 0x10d60 +#define RIO_INB_ATMU_REGS_PORT2_OFFSET 0x10f60 #define MAX_MSG_UNIT_NUM 2 #define MAX_PORT_NUM 4 +#define RIO_INB_ATMU_COUNT 4 struct rio_atmu_regs { u32 rowtar; @@ -63,6 +66,15 @@ struct rio_atmu_regs { u32 pad2[3]; }; +struct rio_inb_atmu_regs { + u32 riwtar; + u32 pad1; + u32 riwbar; + u32 pad2; + u32 riwar; + u32 pad3[3]; +}; + struct rio_dbell_ring { void *virt; dma_addr_t phys; @@ -99,6 +111,7 @@ struct rio_priv { void __iomem *regs_win; struct rio_atmu_regs __iomem *atmu_regs; struct rio_atmu_regs __iomem *maint_atmu_regs; + struct rio_inb_atmu_regs __iomem *inb_atmu_regs; void __iomem *maint_win; void *rmm_handle; /* RapidIO message manager(unit) Handle */ }; From 9e819963b45f79e87f5a8c44960a66c0727c80e6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 20:02:42 +0530 Subject: [PATCH 067/101] powerpc: Disable CPU_FTR_TM if TM is disabled by firmware Firmware is allowed to communicate to us via the "ibm,pa-features" property that TM (Transactional Memory) support is disabled. Currently this doesn't happen on any platform we're aware of, but we should honor it anyway. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6af05fc1dec9..6a799b3cc6b4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -160,6 +160,12 @@ static struct ibm_pa_feature { {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + /* + * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP + * which is 0 if the kernel doesn't support TM. + */ + {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, From 06743521d0eae1263a09bccb1a92a9fbb94660b3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:39 +0530 Subject: [PATCH 068/101] powerpc/mm: Add missing pmd accessors This patch add documentation and missing accessors. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable-ppc64-4k.h | 16 +++++- arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 ++ arch/powerpc/include/asm/pgtable-ppc64.h | 51 +++++++++++++++----- arch/powerpc/mm/hugetlbpage.c | 3 ++ arch/powerpc/mm/pgtable_64.c | 22 +++++++-- 5 files changed, 78 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 7b935683f268..132ee1d482c2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -57,7 +57,21 @@ #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) + +#ifndef __ASSEMBLY__ + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#endif /* !__ASSEMBLY__ */ #define pud_offset(pgdp, addr) \ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index a56b82fb0609..1de35bbd02a6 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -38,4 +38,7 @@ /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + #endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c40ab7c..d12092420560 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -152,7 +152,7 @@ #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_present(pmd) (!pmd_none(pmd)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); @@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd); #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) +extern struct page *pud_page(pud_t pud); + +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); - +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ static inline int pmd_trans_huge(pmd_t pmd) { /* @@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_PRESENT; - return 0; -} - static inline int pmd_trans_splitting(pmd_t pmd) { if (pmd_trans_huge(pmd)) @@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd) extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b460e723f0ec..2b8e5ed28831 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e0c718543174..87ff0c1908a9 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } From f30c59e921f12b209852e1ddc197dc6a8fb0142b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:40 +0530 Subject: [PATCH 069/101] mm: Update generic gup implementation to handle hugepage directory Update generic gup implementation with powerpc specific details. On powerpc at pmd level we can have hugepte, normal pmd pointer or a pointer to the hugepage directory. Tested-by: Steve Capper Acked-by: Steve Capper Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 1 + include/linux/hugetlb.h | 46 +++++++++++++++++++ mm/gup.c | 81 +++++++++++++++++++++++++++++---- 3 files changed, 120 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 26fe1ae15212..f973fce73a43 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd) #endif #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e6d338641fe..e6b62f30ab21 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, } #endif /* !CONFIG_HUGETLB_PAGE */ +/* + * hugepages at page global directory. If arch support + * hugepages at pgd level, they need to define this. + */ +#ifndef pgd_huge +#define pgd_huge(x) 0 +#endif + +#ifndef pgd_write +static inline int pgd_write(pgd_t pgd) +{ + BUG(); + return 0; +} +#endif + +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif + +#ifndef is_hugepd +/* + * Some architectures requires a hugepage directory format that is + * required to support multiple hugepage sizes. For example + * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" + * introduced the same on powerpc. This allows for a more flexible hugepage + * pagetable layout. + */ +typedef struct { unsigned long pd; } hugepd_t; +#define is_hugepd(hugepd) (0) +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr) +{ + return 0; +} +#else +extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr); +#endif #define HUGETLB_ANON_FILE "anon_hugepage" diff --git a/mm/gup.c b/mm/gup.c index cd62c8c90d4a..0ca1df9075ab 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -12,6 +11,7 @@ #include #include +#include #include #include "internal.h" @@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } +static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, + unsigned long end, int write, + struct page **pages, int *nr) +{ + int refs; + struct page *head, *page, *tail; + + if (write && !pgd_write(orig)) + return 0; + + refs = 0; + head = pgd_page(orig); + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pages, nr)) return 0; + } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { + /* + * architecture have different format for hugetlbfs + * pmd format and THP pmd format + */ + if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, + PMD_SHIFT, next, write, pages, nr)) + return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(pgdp, addr); + pudp = pud_offset(&pgd, addr); do { pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (pud_huge(pud)) { + if (unlikely(pud_huge(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, write, - pages, nr)) + pages, nr)) + return 0; + } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { + if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, + PUD_SHIFT, next, write, pages, nr)) return 0; } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) return 0; @@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { + pgd_t pgd = ACCESS_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - if (pgd_none(*pgdp)) + if (pgd_none(pgd)) break; - else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr)) + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, &nr)) + break; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, &nr)) + break; + } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); From b30e759072c182538abb6908681cfd49978ba5e2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:41 +0530 Subject: [PATCH 070/101] powerpc/mm: Switch to generic RCU get_user_pages_fast This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hugetlb.h | 8 +- arch/powerpc/include/asm/page.h | 3 +- arch/powerpc/include/asm/pgtable-ppc64.h | 1 - arch/powerpc/include/asm/pgtable.h | 6 +- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/gup.c | 235 ----------------------- arch/powerpc/mm/hugetlbpage.c | 33 ++-- 8 files changed, 22 insertions(+), 267 deletions(-) delete mode 100644 arch/powerpc/mm/gup.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98e9c548bd75..46227336aacb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d527ac..1d53a65b4ec1 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f973fce73a43..69c059887a2c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) #define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d12092420560..5600e434332f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5da173..a8805fee0df9 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2b8e5ed28831..af56de82375d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); From 1665c4a892983ae5c0efa6ef88c00266e3537717 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:04 +1100 Subject: [PATCH 071/101] powerpc/powernv: Check PHB type in advance The patch checks PHB type a bit early to save a bit cycles for P7 because we don't support M64 for P7IOC no matter what OPAL firmware we have. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 468a0f23c7f2..7ab1dd732993 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -313,6 +313,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) const u32 *r; u64 pci_addr; + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { pr_info(" Firmware too old to support M64 window\n"); return; @@ -325,12 +331,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } - /* FIXME: Support M64 for P7IOC */ - if (phb->type != PNV_PHB_IODA2) { - pr_info(" Not support M64 window\n"); - return; - } - res = &hose->mem_resources[1]; res->start = of_translate_address(dn, r + 2); res->end = res->start + of_read_number(r + 4, 2) - 1; From 9e9e8935215d164cea2f49a7679cc0663c2253b6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:05 +1100 Subject: [PATCH 072/101] powerpc/powernv: Fix condition to remove M64 The M64 resource should be removed if we don't have hook to initialize it, or (not and) fail to do that. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7ab1dd732993..7aa040441c08 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2000,8 +2000,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); } - /* Configure M64 window */ - if (phb->init_m64 && phb->init_m64(phb)) + /* Remove M64 resource if we can't configure it successfully */ + if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; } From 5ef73567813cddabe5fd1105e915be0851820f4f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:06 +1100 Subject: [PATCH 073/101] powerpc/powernv: Rename alloc_m64_pe() to reserve_m64_pe() The patch renames alloc_m64_pe() to reserve_m64_pe() to reflect its real usage: We reserve PE numbers for M64 segments in advance and then pick up the reserved PE numbers when building the mapping between PE numbers and M64 segments. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++---- arch/powerpc/platforms/powernv/pci.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7aa040441c08..cf90cce3dc9f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -172,7 +172,7 @@ fail: return -EIO; } -static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) { resource_size_t sgsz = phb->ioda.m64_segsize; struct pci_dev *pdev; @@ -345,7 +345,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; } @@ -837,8 +837,8 @@ static void pnv_pci_ioda_setup_PEs(void) phb = hose->private_data; /* M64 layout might affect PE allocation */ - if (phb->alloc_m64_pe) - phb->alloc_m64_pe(phb); + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(phb); pnv_ioda_setup_PEs(hose->bus); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 34d29eb2a4de..6c02ff8dd69f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -130,7 +130,7 @@ struct pnv_phb { u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); - void (*alloc_m64_pe)(struct pnv_phb *phb); + void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); From 4b82ab18033a7e8434e568a6485e6e5c2e5adb02 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:07 +1100 Subject: [PATCH 074/101] powerpc/powernv: Initialize M64 PE in time The patch initializes PE instance when reserving PE number to keep consistent things as we did before. Also, it replaces the iteration on bridge's windows with the prefered way. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cf90cce3dc9f..7a7a68873df4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -91,6 +91,24 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +{ + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { + pr_warn("%s: Invalid PE %d on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { + pr_warn("%s: PE %d was assigned on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -185,16 +203,15 @@ static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) * instead of root bus. */ list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = PCI_BRIDGE_RESOURCES; - i <= PCI_BRIDGE_RESOURCE_END; i++) { - r = &pdev->resource[i]; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) continue; base = (r->start - phb->ioda.m64_base) / sgsz; for (step = 0; step < resource_size(r) / sgsz; step++) - set_bit(base + step, phb->ioda.pe_alloc); + pnv_ioda_reserve_pe(phb, base + step); } } } @@ -287,8 +304,6 @@ done: while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < phb->ioda.total_pe) { pe = &phb->ioda.pe_array[i]; - pe->phb = phb; - pe->pe_number = i; if (!master_pe) { pe->flags |= PNV_IODA_PE_MASTER; From b131a8425c34b41ab280edc211fb026451d43264 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:08 +1100 Subject: [PATCH 075/101] powerpc/powernv: Set PELTV for compound PEs Commit 262af55 ("powerpc/powernv: Enable M64 aperatus for PHB3") introduced compound PEs in order to support M64 aperatus on PHB3. However, we never configured PELTV for compound PEs. The patch fixes that by: parent PE can freeze all child compound PEs. Any compound PE affects the group. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 118 +++++++++++++++++++--- 1 file changed, 102 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7a7a68873df4..b96ba48462f7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -522,6 +522,106 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) } #endif /* CONFIG_PCI_MSI */ +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? "adding" : "removing"; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + child->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + + if (!(child->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, &child->slaves, list) { + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + slave->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + } + + return 0; +} + +static int pnv_ioda_set_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + bool is_add) +{ + struct pnv_ioda_pe *slave; + struct pci_dev *pdev; + int ret; + + /* + * Clear PE frozen state. If it's master PE, we need + * clear slave PE frozen state as well. + */ + if (is_add) { + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) + opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); + if (ret) + return ret; + + /* For compound PEs, any one affects all of them */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) { + ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); + if (ret) + return ret; + } + } + + if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) + pdev = pe->pbus->self; + else + pdev = pe->pdev->bus->self; + while (pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *parent; + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + parent = &phb->ioda.pe_array[pdn->pe_number]; + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); + if (ret) + return ret; + } + + pdev = pdev->bus->self; + } + + return 0; +} + static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -576,23 +676,9 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return -ENXIO; } - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* Configure PELTV */ + pnv_ioda_set_peltv(phb, pe, true); - /* Add to all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } /* Setup reverse map */ for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = pe->pe_number; From 4773f76b61e5cbf41ad514978d9338d8a7ed49e1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:09 +1100 Subject: [PATCH 076/101] powerpc/powernv: Simplify pnv_ioda_configure_pe() Nested if statements are always bad and the patch avoids one by checking PHB type and bail in advance if necessary. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 32 ++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b96ba48462f7..762ca14228ea 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -684,26 +684,28 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) phb->ioda.pe_rmap[rid] = pe->pe_number; /* Setup one MVTs on IODA1 */ - if (phb->type == PNV_PHB_IODA1) { - pe->mve_number = pe->pe_number; - rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, - pe->pe_number); + if (phb->type != PNV_PHB_IODA1) { + pe->mve_number = 0; + goto out; + } + + pe->mve_number = pe->pe_number; + rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); + if (rc != OPAL_SUCCESS) { + pe_err(pe, "OPAL error %ld setting up MVE %d\n", + rc, pe->mve_number); + pe->mve_number = -1; + } else { + rc = opal_pci_set_mve_enable(phb->opal_id, + pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %d\n", rc, pe->mve_number); pe->mve_number = -1; - } else { - rc = opal_pci_set_mve_enable(phb->opal_id, - pe->mve_number, OPAL_ENABLE_MVE); - if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", - rc, pe->mve_number); - pe->mve_number = -1; - } } - } else if (phb->type == PNV_PHB_IODA2) - pe->mve_number = 0; + } +out: return 0; } From ec8e4e9d3d83b7a973477c2cb4831511b0eec06e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:10 +1100 Subject: [PATCH 077/101] powerpc/powernv: Bail upon invalid master PE When freezing compound PEs in pnv_ioda_freeze_pe(), we should bail upon illegal master PE. We needn't freeze slave PE because it should have been put into frozen state by hardware. Reported-by: Anton Blanchard Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 762ca14228ea..d03503515692 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -373,7 +373,9 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) /* Fetch master PE */ if (pe->flags & PNV_IODA_PE_SLAVE) { pe = pe->master; - WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) + return; + pe_no = pe->pe_number; } From cf2b1e0eb0c281122b001cc879ca0118bff71255 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:11 +1100 Subject: [PATCH 078/101] powerpc/powernv: Fix potential zero devisor If there're no PHBs under P5IOC2 HUB device tree node, we should bail early to avoid zero devisor and allocating TCE tables. Reported-by: Anton Blanchard Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 94ce3481490b..3336fcbdd08a 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -196,6 +196,22 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) hub_id = be64_to_cpup(prop64); pr_info(" HUB-ID : 0x%016llx\n", hub_id); + /* Count child PHBs and calculate TCE space per PHB */ + for_each_child_of_node(np, phbn) { + if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || + of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) + phb_count++; + } + + if (phb_count <= 0) { + pr_info(" No PHBs for Hub %s\n", np->full_name); + return; + } + + tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); + pr_info(" Allocating %lld MB of TCE memory per PHB\n", + tce_per_phb >> 20); + /* Currently allocate 16M of TCE memory for every Hub * * XXX TODO: Make it chip local if possible @@ -215,18 +231,6 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) return; } - /* Count child PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - /* Calculate how much TCE space we can give per PHB */ - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - /* Initialize PHBs */ for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || From 59994fb01a102a448ba758c9b824a29b4a99cc1b Mon Sep 17 00:00:00 2001 From: Vineeth Vijayan Date: Fri, 14 Nov 2014 14:42:05 +0530 Subject: [PATCH 079/101] powerpc: Use generic PIE randomization Back in 2009 we merged 501cb16d3cfd "Randomise PIEs", which added support for randomizing PIE (Position Independent Executable) binaries. That commit added randomize_et_dyn(), which correctly randomized the addresses, but failed to honor PF_RANDOMIZE. That means it was not possible to disable PIE randomization via the personality flag, or /proc/sys/kernel/randomize_va_space. Since then there has been generic support for PIE randomization added to binfmt_elf.c, selectable via ARCH_BINFMT_ELF_RANDOMIZE_PIE. Enabling that allows us to drop randomize_et_dyn(), which means we start honoring PF_RANDOMIZE correctly. It also causes a fairly major change to how we layout PIE binaries. Currently we will place the binary at 512MB-520MB for 32 bit binaries, or 512MB-1.5GB for 64 bit binaries, eg: $ cat /proc/$$/maps 4e550000-4e580000 r-xp 00000000 08:02 129813 /bin/dash 4e580000-4e590000 rw-p 00020000 08:02 129813 /bin/dash 10014110000-10014140000 rw-p 00000000 00:00 0 [heap] 3fffaa3f0000-3fffaa5a0000 r-xp 00000000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fffaa5a0000-3fffaa5b0000 rw-p 001a0000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fffaa5c0000-3fffaa5d0000 rw-p 00000000 00:00 0 3fffaa5d0000-3fffaa5f0000 r-xp 00000000 00:00 0 [vdso] 3fffaa5f0000-3fffaa620000 r-xp 00000000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fffaa620000-3fffaa630000 rw-p 00020000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3ffffc340000-3ffffc370000 rw-p 00000000 00:00 0 [stack] With this commit applied we don't do any special randomisation for the binary, and instead rely on mmap randomisation. This means the binary ends up at high addresses, eg: $ cat /proc/$$/maps 3fff99820000-3fff999d0000 r-xp 00000000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fff999d0000-3fff999e0000 rw-p 001a0000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fff999f0000-3fff99a00000 rw-p 00000000 00:00 0 3fff99a00000-3fff99a20000 r-xp 00000000 00:00 0 [vdso] 3fff99a20000-3fff99a50000 r-xp 00000000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fff99a50000-3fff99a60000 rw-p 00020000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fff99a60000-3fff99a90000 r-xp 00000000 08:02 129813 /bin/dash 3fff99a90000-3fff99aa0000 rw-p 00020000 08:02 129813 /bin/dash 3fffc3de0000-3fffc3e10000 rw-p 00000000 00:00 0 [stack] 3fffc55e0000-3fffc5610000 rw-p 00000000 00:00 0 [heap] Although this should be OK, it's possible it might break badly written binaries that make assumptions about the address space layout. Signed-off-by: Vineeth Vijayan [mpe: Rewrite changelog] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/elf.h | 3 +-- arch/powerpc/kernel/process.c | 9 --------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 46227336aacb..421df5cc6d18 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -88,6 +88,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF + select ARCH_BINFMT_ELF_RANDOMIZE_PIE select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 888d8f3f2524..57d289acb803 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -28,8 +28,7 @@ the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_ET_DYN_BASE 0x20000000 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f6b82152e7aa..b4cc7bef6b16 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1654,12 +1654,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return ret; } -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < base) - return base; - - return ret; -} From 16b1d26e77b142546e2b9b6dc3b5aa5c44ae3b77 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Tue, 14 Oct 2014 14:08:36 +0530 Subject: [PATCH 080/101] rtc/tpo: Driver to support rtc and wakeup on PowerNV platform The patch implements the OPAL rtc driver that binds with the rtc driver subsystem. The driver uses the platform device infrastructure to probe the rtc device and register it to rtc class framework. The 'wakeup' is supported depending upon the property 'has-tpo' present in the OF node. It provides a way to load the generic rtc driver in in the absence of an OPAL driver. The patch also moves the existing OPAL rtc get/set time interfaces to the new driver and exposes the necessary OPAL calls using EXPORT_SYMBOL_GPL. Test results: ------------- Host: [root@tul169p1 ~]# ls -l /sys/class/rtc/ total 0 lrwxrwxrwx 1 root root 0 Oct 14 03:07 rtc0 -> ../../devices/opal-rtc/rtc/rtc0 [root@tul169p1 ~]# cat /sys/devices/opal-rtc/rtc/rtc0/time 08:10:07 [root@tul169p1 ~]# echo `date '+%s' -d '+ 2 minutes'` > /sys/class/rtc/rtc0/wakealarm [root@tul169p1 ~]# cat /sys/class/rtc/rtc0/wakealarm 1413274345 [root@tul169p1 ~]# FSP: $ smgr mfgState standby $ rtim timeofday System time is valid: 2014/10/14 08:12:04.225115 $ smgr mfgState ipling $ CC: devicetree@vger.kernel.org CC: tglx@linutronix.de CC: rtc-linux@googlegroups.com CC: a.zummo@towertech.it Signed-off-by: Neelesh Gupta Signed-off-by: Michael Ellerman --- .../devicetree/bindings/rtc/rtc-opal.txt | 16 ++ arch/powerpc/include/asm/opal.h | 7 +- arch/powerpc/kernel/time.c | 1 + arch/powerpc/platforms/powernv/opal-async.c | 3 + arch/powerpc/platforms/powernv/opal-rtc.c | 67 ++--- .../powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/opal.c | 6 + arch/powerpc/platforms/powernv/setup.c | 2 - drivers/rtc/Kconfig | 11 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-opal.c | 261 ++++++++++++++++++ 11 files changed, 326 insertions(+), 51 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/rtc-opal.txt create mode 100644 drivers/rtc/rtc-opal.c diff --git a/Documentation/devicetree/bindings/rtc/rtc-opal.txt b/Documentation/devicetree/bindings/rtc/rtc-opal.txt new file mode 100644 index 000000000000..af87e5ecac54 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/rtc-opal.txt @@ -0,0 +1,16 @@ +IBM OPAL real-time clock +------------------------ + +Required properties: +- comapatible: Should be "ibm,opal-rtc" + +Optional properties: +- has-tpo: Decides if the wakeup is supported or not. + +Example: + rtc { + compatible = "ibm,opal-rtc"; + has-tpo; + phandle = <0x10000029>; + linux,phandle = <0x10000029>; + }; diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5d073e50cac8..60250e2d1f0d 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,8 @@ struct opal_sg_list { #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 +#define OPAL_WRITE_TPO 103 +#define OPAL_READ_TPO 104 #define OPAL_IPMI_SEND 107 #define OPAL_IPMI_RECV 108 @@ -832,6 +834,9 @@ int64_t opal_rtc_read(__be32 *year_month_day, __be64 *hour_minute_second_millisecond); int64_t opal_rtc_write(uint32_t year_month_day, uint64_t hour_minute_second_millisecond); +int64_t opal_tpo_read(uint64_t token, __be32 *year_mon_day, __be32 *hour_min); +int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, + uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); @@ -1009,8 +1014,6 @@ extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); struct rtc_time; -extern int opal_set_rtc_time(struct rtc_time *tm); -extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9f8ea617ff2c..fa7c4f12104f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -989,6 +989,7 @@ void GregorianDay(struct rtc_time * tm) tm->tm_wday = day % 7; } +EXPORT_SYMBOL_GPL(GregorianDay); void to_tm(int tim, struct rtc_time * tm) { diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index e462ab947d16..693b6cdac691 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -71,6 +71,7 @@ int opal_async_get_token_interruptible(void) return token; } +EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); int __opal_async_release_token(int token) { @@ -102,6 +103,7 @@ int opal_async_release_token(int token) return 0; } +EXPORT_SYMBOL_GPL(opal_async_release_token); int opal_async_wait_response(uint64_t token, struct opal_msg *msg) { @@ -120,6 +122,7 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return 0; } +EXPORT_SYMBOL_GPL(opal_async_wait_response); static int opal_async_comp_event(struct notifier_block *nb, unsigned long msg_type, void *msg) diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 499707ddaa9c..37dbee15769f 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -43,7 +45,7 @@ unsigned long __init opal_get_boot_time(void) long rc = OPAL_BUSY; if (!opal_check_token(OPAL_RTC_READ)) - goto out; + return 0; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); @@ -53,62 +55,33 @@ unsigned long __init opal_get_boot_time(void) mdelay(10); } if (rc != OPAL_SUCCESS) - goto out; + return 0; y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -out: - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; } -void opal_get_rtc_time(struct rtc_time *tm) +static __init int opal_time_init(void) { - long rc = OPAL_BUSY; - u32 y_m_d; - u64 h_m_s_ms; - __be32 __y_m_d; - __be64 __h_m_s_ms; + struct platform_device *pdev; + struct device_node *rtc; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + rtc = of_find_node_by_path("/ibm,opal/rtc"); + if (rtc) { + pdev = of_platform_device_create(rtc, "opal-rtc", NULL); + of_node_put(rtc); + } else { + if (opal_check_token(OPAL_RTC_READ) || + opal_check_token(OPAL_READ_TPO)) + pdev = platform_device_register_simple("opal-rtc", -1, + NULL, 0); else - mdelay(10); + return -ENODEV; } - if (rc != OPAL_SUCCESS) - return; - y_m_d = be32_to_cpu(__y_m_d); - h_m_s_ms = be64_to_cpu(__h_m_s_ms); - opal_to_tm(y_m_d, h_m_s_ms, tm); -} - -int opal_set_rtc_time(struct rtc_time *tm) -{ - long rc = OPAL_BUSY; - u32 y_m_d = 0; - u64 h_m_s_ms = 0; - - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; - y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; - y_m_d |= ((u32)bin2bcd(tm->tm_mday)); - - h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; - - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_write(y_m_d, h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); - else - mdelay(10); - } - return rc == OPAL_SUCCESS ? 0 : -EIO; + + return PTR_ERR_OR_ZERO(pdev); } +machine_subsys_initcall(powernv, opal_time_init); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 59978af9babe..0a299be588af 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -250,5 +250,7 @@ OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0297702e8ae9..a1c37f9d60d2 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -819,3 +819,9 @@ void opal_free_sg_list(struct opal_sg_list *sg) sg = NULL; } } + +EXPORT_SYMBOL_GPL(opal_poll_events); +EXPORT_SYMBOL_GPL(opal_rtc_read); +EXPORT_SYMBOL_GPL(opal_rtc_write); +EXPORT_SYMBOL_GPL(opal_tpo_read); +EXPORT_SYMBOL_GPL(opal_tpo_write); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 941831d67cb2..30b1c3e298a6 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -265,8 +265,6 @@ static unsigned long pnv_memory_block_size(void) static void __init pnv_setup_machdep_opal(void) { ppc_md.get_boot_time = opal_get_boot_time; - ppc_md.get_rtc_time = opal_get_rtc_time; - ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6dd12ddbabc6..c8f0ec7464ce 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -979,6 +979,17 @@ config RTC_DRV_NUC900 If you say yes here you get support for the RTC subsystem of the NUC910/NUC920 used in embedded systems. +config RTC_DRV_OPAL + tristate "IBM OPAL RTC driver" + depends on PPC_POWERNV + default y + help + If you say yes here you get support for the PowerNV platform RTC + driver based on OPAL interfaces. + + This driver can also be built as a module. If so, the module + will be called rtc-opal. + comment "on-CPU RTC drivers" config RTC_DRV_DAVINCI diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index b188323c096a..c8ef3e1e6ccd 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o obj-$(CONFIG_RTC_DRV_MPC5121) += rtc-mpc5121.o obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o obj-$(CONFIG_RTC_DRV_NUC900) += rtc-nuc900.o +obj-$(CONFIG_RTC_DRV_OPAL) += rtc-opal.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PALMAS) += rtc-palmas.o obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c new file mode 100644 index 000000000000..95f652165fe9 --- /dev/null +++ b/drivers/rtc/rtc-opal.c @@ -0,0 +1,261 @@ +/* + * IBM OPAL RTC driver + * Copyright (C) 2014 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + */ + +#define DRVNAME "rtc-opal" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +{ + tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; + tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1; + tm->tm_mday = bcd2bin(y_m_d & 0xff); + tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); + tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); + tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); + + GregorianDay(tm); +} + +static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) +{ + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; + *y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; + *y_m_d |= ((u32)bin2bcd(tm->tm_mday)); + + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; +} + +static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d; + u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + if (rc != OPAL_SUCCESS) + return -EIO; + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); + opal_to_tm(y_m_d, h_m_s_ms, tm); + + return 0; +} + +static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d = 0; + u64 h_m_s_ms = 0; + + tm_to_opal(tm, &y_m_d, &h_m_s_ms); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_write(y_m_d, h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + return rc == OPAL_SUCCESS ? 0 : -EIO; +} + +/* + * TPO Timed Power-On + * + * TPO get/set OPAL calls care about the hour and min and to make it consistent + * with the rtc utility time conversion functions, we use the 'u64' to store + * its value and perform bit shift by 32 before use.. + */ +static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + __be32 __y_m_d, __h_m; + struct opal_msg msg; + int rc, token; + u64 h_m_s_ms; + u32 y_m_d; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + rc = opal_tpo_read(token, &__y_m_d, &__h_m); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto exit; + } + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32); + opal_to_tm(y_m_d, h_m_s_ms, &alarm->time); + +exit: + opal_async_release_token(token); + return rc; +} + +/* Set Timed Power-On */ +static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + u64 h_m_s_ms = 0, token; + struct opal_msg msg; + u32 y_m_d = 0; + int rc; + + tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms); + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + /* TPO, we care about hour and minute */ + rc = opal_tpo_write(token, y_m_d, + (u32)((h_m_s_ms >> 32) & 0xffff0000)); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) + rc = -EIO; + +exit: + opal_async_release_token(token); + return rc; +} + +static const struct rtc_class_ops opal_rtc_ops = { + .read_time = opal_get_rtc_time, + .set_time = opal_set_rtc_time, + .read_alarm = opal_get_tpo_time, + .set_alarm = opal_set_tpo_time, +}; + +static int opal_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtc; + + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "has-tpo", + NULL)) + device_set_wakeup_capable(&pdev->dev, true); + + rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + rtc->uie_unsupported = 1; + + return 0; +} + +static const struct of_device_id opal_rtc_match[] = { + { + .compatible = "ibm,opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(of, opal_rtc_match); + +static const struct platform_device_id opal_rtc_driver_ids[] = { + { + .name = "opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(platform, opal_rtc_driver_ids); + +static struct platform_driver opal_rtc_driver = { + .probe = opal_rtc_probe, + .id_table = opal_rtc_driver_ids, + .driver = { + .name = DRVNAME, + .owner = THIS_MODULE, + .of_match_table = opal_rtc_match, + }, +}; + +static int __init opal_rtc_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return -ENODEV; + + return platform_driver_register(&opal_rtc_driver); +} + +static void __exit opal_rtc_exit(void) +{ + platform_driver_unregister(&opal_rtc_driver); +} + +MODULE_AUTHOR("Neelesh Gupta "); +MODULE_DESCRIPTION("IBM OPAL RTC driver"); +MODULE_LICENSE("GPL"); + +module_init(opal_rtc_init); +module_exit(opal_rtc_exit); From 76f3e2929bb6b476fb02b519ad953e2e29ee7bd5 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Fri, 31 Jan 2014 15:10:14 +0530 Subject: [PATCH 081/101] powerpc/config: Enable memory driver As Freescale IFC controller has been moved to driver to driver/memory. So enable memory driver in powerpc config Signed-off-by: Prabhakar Kushwaha Signed-off-by: Scott Wood --- arch/powerpc/configs/corenet32_smp_defconfig | 1 + arch/powerpc/configs/corenet64_smp_defconfig | 1 + arch/powerpc/configs/mpc85xx_defconfig | 1 + arch/powerpc/configs/mpc85xx_smp_defconfig | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 688e9e4d29a1..611efe99faeb 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -144,6 +144,7 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_UIO=y CONFIG_STAGING=y +CONFIG_MEMORY=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 6db97e4414b2..be24a18c0d96 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -118,6 +118,7 @@ CONFIG_FSL_DMA=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_FSL_CORENET_CF=y +CONFIG_MEMORY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index d2c415489f72..02395fab19bd 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -215,6 +215,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 87460083dbc7..b5d1b82a1b43 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -216,6 +216,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y From bc78b05bb412fad135715551fc536ca511a3cff2 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 14 Nov 2014 17:37:50 +1100 Subject: [PATCH 082/101] cxl: Return error to PSL if IRQ demultiplexing fails & print clearer warning If an AFU has a hardware bug that causes it to acknowledge a context terminate or remove while that context has outstanding transactions, it is possible for the kernel to receive an interrupt for that context after we have removed it from the context list. The kernel will not be able to demultiplex the interrupt (or worse - if we have already reallocated the process handle we could mis-attribute it to the new context), and printed a big scary warning. It did not acknowledge the interrupt, which would effectively halt further translation fault processing on the PSL. This patch makes the warning clearer about the likely cause of the issue (i.e. hardware bug) to make it obvious to future AFU designers of what needs to be fixed. It also prints out the process handle which can then be matched up with hardware and software traces for debugging. It also acknowledges the interrupt to the PSL with either an address error or acknowledge, so that the PSL can continue with other translations. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/irq.c | 46 ++++++++++++++++++++++++--------------- drivers/misc/cxl/native.c | 14 ++++++------ 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 3d2b8677ec8a..64a4aa3a5c5d 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -612,7 +612,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int cxl_detach_process(struct cxl_context *ctx); -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info); +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); int cxl_check_error(struct cxl_afu *afu); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 336020c8e1af..35fcb3d43dc0 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -92,20 +92,13 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da return IRQ_HANDLED; } -static irqreturn_t cxl_irq(int irq, void *data) +static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) { struct cxl_context *ctx = data; - struct cxl_irq_info irq_info; u64 dsisr, dar; - int result; - if ((result = cxl_get_irq(ctx, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", result); - return IRQ_HANDLED; - } - - dsisr = irq_info.dsisr; - dar = irq_info.dar; + dsisr = irq_info->dsisr; + dar = irq_info->dar; pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); @@ -149,9 +142,9 @@ static irqreturn_t cxl_irq(int irq, void *data) if (dsisr & CXL_PSL_DSISR_An_UR) pr_devel("CXL interrupt: AURP PTE not found\n"); if (dsisr & CXL_PSL_DSISR_An_PE) - return handle_psl_slice_error(ctx, dsisr, irq_info.errstat); + return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err); + pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); if (ctx->pending_afu_err) { /* @@ -163,10 +156,10 @@ static irqreturn_t cxl_irq(int irq, void *data) */ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " "undelivered to pe %i: %.llx\n", - ctx->pe, irq_info.afu_err); + ctx->pe, irq_info->afu_err); } else { spin_lock(&ctx->lock); - ctx->afu_err = irq_info.afu_err; + ctx->afu_err = irq_info->afu_err; ctx->pending_afu_err = 1; spin_unlock(&ctx->lock); @@ -182,24 +175,43 @@ static irqreturn_t cxl_irq(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + static irqreturn_t cxl_irq_multiplexed(int irq, void *data) { struct cxl_afu *afu = data; struct cxl_context *ctx; + struct cxl_irq_info irq_info; int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; int ret; + if ((ret = cxl_get_irq(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + rcu_read_lock(); ctx = idr_find(&afu->contexts_idr, ph); if (ctx) { - ret = cxl_irq(irq, ctx); + ret = cxl_irq(irq, ctx, &irq_info); rcu_read_unlock(); return ret; } rcu_read_unlock(); - WARN(1, "Unable to demultiplex CXL PSL IRQ\n"); - return IRQ_HANDLED; + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" + " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); } static irqreturn_t cxl_irq_afu(int irq, void *data) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index d47532e8f4f1..9a5a442269a8 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -637,18 +637,18 @@ int cxl_detach_process(struct cxl_context *ctx) return detach_process_native_afu_directed(ctx); } -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info) +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; - info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); - info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An); - info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An); - pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An); + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); + pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An); info->pid = pidtid >> 32; info->tid = pidtid & 0xffffffff; - info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An); - info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An); + info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); return 0; } From 80fa93fce37d3490f4bb0da8a5b239a6745bc744 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 14 Nov 2014 18:09:28 +1100 Subject: [PATCH 083/101] cxl: Name interrupts in /proc/interrupt Currently all interrupts generated by cxl are named "cxl". This is not very informative as we can't distinguish between cards, AFUs, error interrupts, user contexts and user interrupts numbers. Being able to distinguish them is useful for setting affinity. This patch gives each of these names in /proc/interrupts. A two card CAPI system, with afu0.0 having 2 active contexts each with 4 user IRQs each, will now look like this: % grep cxl /proc/interrupts 444: 0 OPAL ICS 141312 Level cxl-card1-err 445: 0 OPAL ICS 141313 Level cxl-afu1.0-err 446: 0 OPAL ICS 141314 Level cxl-afu1.0 462: 0 OPAL ICS 2052 Level cxl-afu0.0-pe0-1 463: 75517 OPAL ICS 2053 Level cxl-afu0.0-pe0-2 468: 0 OPAL ICS 2054 Level cxl-afu0.0-pe0-3 469: 0 OPAL ICS 2055 Level cxl-afu0.0-pe0-4 470: 0 OPAL ICS 2056 Level cxl-afu0.0-pe1-1 471: 75506 OPAL ICS 2057 Level cxl-afu0.0-pe1-2 472: 0 OPAL ICS 2058 Level cxl-afu0.0-pe1-3 473: 0 OPAL ICS 2059 Level cxl-afu0.0-pe1-4 502: 1066 OPAL ICS 2050 Level cxl-afu0.0 514: 0 OPAL ICS 2048 Level cxl-card0-err 515: 0 OPAL ICS 2049 Level cxl-afu0.0-err Signed-off-by: Michael Neuling Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 13 ++++-- drivers/misc/cxl/irq.c | 98 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 64a4aa3a5c5d..b5b6bda44a00 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -336,6 +336,8 @@ struct cxl_sste { struct cxl_afu { irq_hw_number_t psl_hwirq; irq_hw_number_t serr_hwirq; + char *err_irq_name; + char *psl_irq_name; unsigned int serr_virq; void __iomem *p1n_mmio; void __iomem *p2n_mmio; @@ -379,6 +381,12 @@ struct cxl_afu { bool enabled; }; + +struct cxl_irq_name { + struct list_head list; + char *name; +}; + /* * This is a cxl context. If the PSL is in dedicated mode, there will be one * of these per AFU. If in AFU directed there can be lots of these. @@ -403,6 +411,7 @@ struct cxl_context { unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; + struct list_head irq_names; u64 fault_addr; u64 fault_dsisr; u64 afu_err; @@ -444,6 +453,7 @@ struct cxl { struct dentry *trace; struct dentry *psl_err_chk; struct dentry *debugfs; + char *irq_name; struct bin_attribute cxl_attr; int adapter_num; int user_irqs; @@ -563,9 +573,6 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); int cxl_afu_deactivate_mode(struct cxl_afu *afu); int cxl_afu_select_best_mode(struct cxl_afu *afu); -unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie); -void cxl_unmap_irq(unsigned int virq, void *cookie); int cxl_register_psl_irq(struct cxl_afu *afu); void cxl_release_psl_irq(struct cxl_afu *afu); int cxl_register_psl_err_irq(struct cxl *adapter); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 35fcb3d43dc0..c294925f73ee 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -255,7 +255,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) } unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie) + irq_handler_t handler, void *cookie, const char *name) { unsigned int virq; int result; @@ -271,7 +271,7 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); - result = request_irq(virq, handler, 0, "cxl", cookie); + result = request_irq(virq, handler, 0, name, cookie); if (result) { dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); return 0; @@ -290,14 +290,15 @@ static int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, void *cookie, irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq) + unsigned int *dest_virq, + const char *name) { int hwirq, virq; if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) return hwirq; - if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie))) + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) goto err; *dest_hwirq = hwirq; @@ -314,10 +315,19 @@ int cxl_register_psl_err_irq(struct cxl *adapter) { int rc; + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, &adapter->err_hwirq, - &adapter->err_virq))) + &adapter->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; return rc; + } cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); @@ -329,6 +339,7 @@ void cxl_release_psl_err_irq(struct cxl *adapter) cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); + kfree(adapter->irq_name); } int cxl_register_serr_irq(struct cxl_afu *afu) @@ -336,10 +347,18 @@ int cxl_register_serr_irq(struct cxl_afu *afu) u64 serr; int rc; + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, &afu->serr_hwirq, - &afu->serr_virq))) + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; return rc; + } serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); @@ -353,24 +372,50 @@ void cxl_release_serr_irq(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); } int cxl_register_psl_irq(struct cxl_afu *afu) { - return cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, - &afu->psl_hwirq, &afu->psl_virq); + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, + &afu->psl_hwirq, &afu->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; } void cxl_release_psl_irq(struct cxl_afu *afu) { cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); + kfree(afu->psl_irq_name); +} + +void afu_irq_name_free(struct cxl_context *ctx) +{ + struct cxl_irq_name *irq_name, *tmp; + + list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { + kfree(irq_name->name); + list_del(&irq_name->list); + kfree(irq_name); + } } int afu_register_irqs(struct cxl_context *ctx, u32 count) { irq_hw_number_t hwirq; - int rc, r, i; + int rc, r, i, j = 1; + struct cxl_irq_name *irq_name; if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -384,15 +429,47 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) return -ENOMEM; + + /* + * Allocate names first. If any fail, bail out before allocating + * actual hardware IRQs. + */ + INIT_LIST_HEAD(&ctx->irq_names); + for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + irq_name = kmalloc(sizeof(struct cxl_irq_name), + GFP_KERNEL); + if (!irq_name) + goto out; + irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", + dev_name(&ctx->afu->dev), + ctx->pe, j); + if (!irq_name->name) { + kfree(irq_name); + goto out; + } + /* Add to tail so next look get the correct order */ + list_add_tail(&irq_name->list, &ctx->irq_names); + j++; + } + } + + /* We've allocated all memory now, so let's do the irq allocations */ + irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); for (r = 1; r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { cxl_map_irq(ctx->afu->adapter, hwirq, - cxl_irq_afu, ctx); + cxl_irq_afu, ctx, irq_name->name); + irq_name = list_next_entry(irq_name, list); } } return 0; + +out: + afu_irq_name_free(ctx); + return -ENOMEM; } void afu_release_irqs(struct cxl_context *ctx) @@ -410,5 +487,6 @@ void afu_release_irqs(struct cxl_context *ctx) } } + afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); } From a49ab6eeebe5624d51466969a7bac611231eede8 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 17 Nov 2014 10:52:30 +0800 Subject: [PATCH 084/101] powerpc/pseries: Initialise nvram_pstore_info's buf_lock nvram_pstore_info's buf_lock is not initialized before registering, which is clearly incorrect. It causes some strange behavior when trying to obtain the lock during kdump process. On a UP configuration, the console stopped for a couple of seconds, then "lockup suspected" warning printed out, but then it continued to run. So try lock fails, and lockup reported, but then arch_spin_lock() passes. Signed-off-by: Li Zhong [mpe: Edited changelog] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/nvram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 11a3b617ef5d..054a0ed5c7ee 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -715,6 +715,8 @@ static int nvram_pstore_init(void) nvram_pstore_info.buf = oops_data; nvram_pstore_info.bufsize = oops_data_sz; + spin_lock_init(&nvram_pstore_info.buf_lock); + rc = pstore_register(&nvram_pstore_info); if (rc != 0) pr_err("nvram: pstore_register() failed, defaults to " From e39f223fc93580c86ccf6b3422033e349f57f0dd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 18 Nov 2014 16:47:35 +1100 Subject: [PATCH 085/101] powerpc: Remove more traces of bootmem Although we are now selecting NO_BOOTMEM, we still have some traces of bootmem lying around. That is because even with NO_BOOTMEM there is still a shim that converts bootmem calls into memblock calls, but ultimately we want to remove all traces of bootmem. Most of the patch is conversions from alloc_bootmem() to memblock_virt_alloc(). In general a call such as: p = (struct foo *)alloc_bootmem(x); Becomes: p = memblock_virt_alloc(x, 0); We don't need the cast because memblock_virt_alloc() returns a void *. The alignment value of zero tells memblock to use the default alignment, which is SMP_CACHE_BYTES, the same value alloc_bootmem() uses. We remove a number of NULL checks on the result of memblock_virt_alloc(). That is because memblock_virt_alloc() will panic if it can't allocate, in exactly the same way as alloc_bootmem(), so the NULL checks are and always have been redundant. The memory returned by memblock_virt_alloc() is already zeroed, so we remove several memsets of the result of memblock_virt_alloc(). Finally we convert a few uses of __alloc_bootmem(x, y, MAX_DMA_ADDRESS) to just plain memblock_virt_alloc(). We don't use memblock_alloc_base() because MAX_DMA_ADDRESS is ~0ul on powerpc, so limiting the allocation to that is pointless, 16XB ought to be enough for anyone. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 1 - arch/powerpc/kernel/pci_32.c | 4 +--- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/lib/alloc.c | 4 +--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/mmu_context_nohash.c | 8 ++++---- arch/powerpc/platforms/cell/celleb_pci.c | 6 +++--- arch/powerpc/platforms/powermac/nvram.c | 6 +----- arch/powerpc/platforms/powernv/pci-ioda.c | 12 +++--------- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 16 ++++------------ arch/powerpc/platforms/ps3/setup.c | 7 +------ arch/powerpc/sysdev/fsl_pci.c | 1 - 12 files changed, 20 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bc2dab52a991..37d512d35943 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 432459c817fa..1f7930037cb7 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -199,9 +199,7 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); - if (!of_prop) - return; + of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6e5310ddf8c7..49f553bbb360 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -660,7 +660,7 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel this initializes bootmem, which is used + * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index da22c84a8fed..4a6c2cf890d9 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -13,9 +13,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) if (mem_init_done) p = kzalloc(size, mask); else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); + p = memblock_virt_alloc(size, 0); } return p; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..8c9b8115867c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -315,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) * If gpages can be in highmem we can't use the trick of storing the * data structure in the page; allocate space for this */ - m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; #else m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 928ebe79668b..9cba6cba2e50 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -421,12 +421,12 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = alloc_bootmem(CTX_MAP_SIZE); - context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0); #ifndef CONFIG_SMP - stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0); #else - stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 2b98a36ef8fb..3ce70ded2d6a 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -401,11 +401,11 @@ error: } else { if (config && *config) { size = 256; - free_bootmem(__pa(*config), size); + memblock_free(__pa(*config), size); } if (res && *res) { size = sizeof(struct celleb_pci_resource); - free_bootmem(__pa(*res), size); + memblock_free(__pa(*res), size); } } diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 014d06e6d46b..60b03a1703d1 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,11 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = alloc_bootmem(NVRAM_SIZE); - if (nvram_image == NULL) { - printk(KERN_ERR "nvram: can't allocate ram image\n"); - return -ENOMEM; - } + nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d03503515692..cc861c14840d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1940,19 +1940,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (!phb) { - pr_err(" Out of memory !\n"); - return; - } + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); /* Allocate PCI controller */ - memset(phb, 0, sizeof(struct pnv_phb)); phb->hose = hose = pcibios_alloc_controller(np); if (!phb->hose) { pr_err(" Can't allocate PCI controller for %s\n", np->full_name); - free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); + memblock_free(__pa(phb), sizeof(struct pnv_phb)); return; } @@ -2019,8 +2014,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); - aux = alloc_bootmem(size); - memset(aux, 0, size); + aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 3336fcbdd08a..6ef6d4d8e7e2 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, return; } - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = pcibios_alloc_controller(np); - } - if (!phb || !phb->hose) { + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb->hose = pcibios_alloc_controller(np); + if (!phb->hose) { pr_err(" Failed to allocate PCI controller\n"); return; } @@ -216,12 +213,7 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) * * XXX TODO: Make it chip local if possible */ - tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY, - __pa(MAX_DMA_ADDRESS)); - if (!tce_mem) { - pr_err(" Failed to allocate TCE Memory !\n"); - return; - } + tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); pr_debug(" TCE : 0x%016lx..0x%016lx\n", __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 009a2004b876..799c8580ab09 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -125,12 +125,7 @@ static void __init prealloc(struct ps3_prealloc *p) if (!p->size) return; - p->address = __alloc_bootmem(p->size, p->align, __pa(MAX_DMA_ADDRESS)); - if (!p->address) { - printk(KERN_ERR "%s: Cannot allocate %s\n", __func__, - p->name); - return; - } + p->address = memblock_virt_alloc(p->size, p->align); printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, p->address); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index d8484d7cffaa..6455c1eada1a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include From b85743ee95f3593ded82bca55cd3c38090ad001f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:28 +1100 Subject: [PATCH 086/101] powerpc/eeh: Refactor eeh_reset_pe() The patch refactors eeh_reset_pe() in order for: * Varied return values for different failure cases. * Replace pr_err() with pr_warn() and print function name. * Coding style cleanup. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2248a1999c64..967e4a08d824 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -758,30 +758,37 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) int eeh_reset_pe(struct eeh_pe *pe) { int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, rc; + int i, state, ret; /* Take three shots at resetting the bus */ - for (i=0; i<3; i++) { + for (i = 0; i < 3; i++) { eeh_reset_pe_once(pe); /* * EEH_PE_ISOLATED is expected to be removed after * BAR restore. */ - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((rc & flags) == flags) - return 0; - - if (rc < 0) { - pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", - __func__, pe->phb->global_number, pe->addr); - return -1; + state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if ((state & flags) == flags) { + ret = 0; + goto out; } - pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", - i+1, pe->phb->global_number, pe->addr, rc); + + if (state < 0) { + pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + __func__, pe->phb->global_number, pe->addr); + ret = -ENOTRECOVERABLE; + goto out; + } + + /* We might run out of credits */ + ret = -EIO; + pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", + __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } - return -1; +out: + return ret; } /** From 28bf36f92afc6b22ba50ceaf36ba89afa9f5c1e8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:29 +1100 Subject: [PATCH 087/101] powerpc/eeh: Set EEH_PE_RESET on PE reset The patch introduces additional flag EEH_PE_RESET to indicate the corresponding PE is under reset. In turn, the PE retrieval bakcend on PowerNV platform can return unfrozen state for the EEH core to moving forward. Flag EEH_PE_CFG_BLOCKED isn't the correct one for the purpose. In PCI passthrou case, the problem is more worse: Guest doesn't recover 6th EEH error. The PE is left in isolated (frozen) and config blocked state on Broadcom adapters. We can't retrieve the PE's state correctly any more, even from the host side via sysfs /sys/bus/pci/devices/xxx/eeh_pe_state. Reported-by: Rajeshkumar Subramanian Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 4 ++++ arch/powerpc/kernel/eeh_driver.c | 10 ++-------- arch/powerpc/platforms/powernv/eeh-ioda.c | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ca07f9c27335..2e633b41712a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -72,6 +72,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ #define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */ +#define EEH_PE_RESET (1 << 3) /* PE reset in progress */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 967e4a08d824..b372bfdbfb3a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -760,6 +760,9 @@ int eeh_reset_pe(struct eeh_pe *pe) int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int i, state, ret; + /* Mark as reset and block config space */ + eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + /* Take three shots at resetting the bus */ for (i = 0; i < 3; i++) { eeh_reset_pe_once(pe); @@ -788,6 +791,7 @@ int eeh_reset_pe(struct eeh_pe *pe) } out: + eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); return ret; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6535936bdf27..b17e793ba67e 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -528,13 +528,11 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Issue reset */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); ret = eeh_reset_pe(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; } - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Unfreeze the PE */ ret = eeh_clear_pe_frozen_state(pe, true); @@ -601,19 +599,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); rc = eeh_reset_pe(pe); - if (rc) { - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (rc) return rc; - } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index db3803e21483..fb38fe4dba89 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -372,7 +372,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) * moving forward, we have to return operational * state during PE reset. */ - if (pe->state & EEH_PE_CFG_BLOCKED) { + if (pe->state & EEH_PE_RESET) { result = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | From b1d76a7d57762332cd8e0c020470d43c5ad3948e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:30 +1100 Subject: [PATCH 088/101] powerpc/eeh: Recover EEH error on ownership change for BCM5719 In PCI passthrou scenario, we need simulate EEH recovery for Emulex adapters when their ownership changes, as we did in commit 5cfb20b96 ("powerpc/eeh: Emulate EEH recovery for VFIO devices"). Broadcom BCM5719 adpaters are facing same problem and needs same cure. Reported-by: Rajeshkumar Subramanian Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b372bfdbfb3a..f1c6b115cb37 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1220,6 +1220,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) static struct pci_device_id eeh_reset_ids[] = { { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ { 0 } }; From a450e8f55a57d049ac3afe218f06567e12d6b4f5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sat, 22 Nov 2014 21:58:09 +1100 Subject: [PATCH 089/101] powerpc/eeh: Dump PHB diag-data early On PowerNV platform, PHB diag-data is dumped after stopping device drivers. In case of recursive EEH errors, the kernel is usually crashed before dumping PHB diag-data for the second EEH error. It's hard to locate the root cause of the second EEH error without PHB diag-data. The patch adds one more EEH option "eeh=early_log", which helps dumping PHB diag-data immediately once frozen PE is detected, in order to get the PHB diag-data for the second EEH error. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 2 ++ arch/powerpc/platforms/powernv/eeh-ioda.c | 13 ++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2e633b41712a..0652ebe117af 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -39,6 +39,7 @@ struct device_node; #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ #define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ +#define EEH_EARLY_DUMP_LOG 0x20 /* Dump log immediately */ /* * Delay for PE reset, all in ms diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f1c6b115cb37..05be77d9ea0e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -143,6 +143,8 @@ static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) eeh_add_flag(EEH_FORCE_DISABLED); + else if (!strcmp(str, "early_log")) + eeh_add_flag(EEH_EARLY_DUMP_LOG); return 1; } diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index fb38fe4dba89..2809c9895288 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -353,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe) } else if (!(pe->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -451,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -730,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); return 0; } @@ -1086,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); } /* From 6d626c5ea3d8411cc2a72d7cabe70f01dfc32d1d Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 24 Nov 2014 21:59:26 +0530 Subject: [PATCH 090/101] powerpc/powernv: Cleanup unused MCE definitions/declarations. Cleanup OpalMCE_* definitions/declarations and other related code which is not used anymore. Signed-off-by: Mahesh Salgaonkar Acked-by: Benjamin Herrrenschmidt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 104 -------------------------- arch/powerpc/include/asm/paca.h | 7 -- arch/powerpc/kernel/asm-offsets.c | 7 -- arch/powerpc/kernel/exceptions-64s.S | 17 ----- arch/powerpc/platforms/powernv/opal.c | 1 - 5 files changed, 136 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 60250e2d1f0d..5cd8d2fddba9 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -288,62 +288,6 @@ enum OpalMessageType { OPAL_MSG_TYPE_MAX, }; -/* Machine check related definitions */ -enum OpalMCE_Version { - OpalMCE_V1 = 1, -}; - -enum OpalMCE_Severity { - OpalMCE_SEV_NO_ERROR = 0, - OpalMCE_SEV_WARNING = 1, - OpalMCE_SEV_ERROR_SYNC = 2, - OpalMCE_SEV_FATAL = 3, -}; - -enum OpalMCE_Disposition { - OpalMCE_DISPOSITION_RECOVERED = 0, - OpalMCE_DISPOSITION_NOT_RECOVERED = 1, -}; - -enum OpalMCE_Initiator { - OpalMCE_INITIATOR_UNKNOWN = 0, - OpalMCE_INITIATOR_CPU = 1, -}; - -enum OpalMCE_ErrorType { - OpalMCE_ERROR_TYPE_UNKNOWN = 0, - OpalMCE_ERROR_TYPE_UE = 1, - OpalMCE_ERROR_TYPE_SLB = 2, - OpalMCE_ERROR_TYPE_ERAT = 3, - OpalMCE_ERROR_TYPE_TLB = 4, -}; - -enum OpalMCE_UeErrorType { - OpalMCE_UE_ERROR_INDETERMINATE = 0, - OpalMCE_UE_ERROR_IFETCH = 1, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, - OpalMCE_UE_ERROR_LOAD_STORE = 3, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, -}; - -enum OpalMCE_SlbErrorType { - OpalMCE_SLB_ERROR_INDETERMINATE = 0, - OpalMCE_SLB_ERROR_PARITY = 1, - OpalMCE_SLB_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_EratErrorType { - OpalMCE_ERAT_ERROR_INDETERMINATE = 0, - OpalMCE_ERAT_ERROR_PARITY = 1, - OpalMCE_ERAT_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_TlbErrorType { - OpalMCE_TLB_ERROR_INDETERMINATE = 0, - OpalMCE_TLB_ERROR_PARITY = 1, - OpalMCE_TLB_ERROR_MULTIHIT = 2, -}; - enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, OPAL_THREAD_STARTED = 0x1, @@ -467,54 +411,6 @@ struct opal_ipmi_msg { uint8_t data[]; }; -struct opal_machine_check_event { - enum OpalMCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum OpalMCE_Severity severity:8; /* 0x02 */ - enum OpalMCE_Initiator initiator:8; /* 0x03 */ - enum OpalMCE_ErrorType error_type:8; /* 0x04 */ - enum OpalMCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ - struct { - enum OpalMCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; - } ue_error; - - struct { - enum OpalMCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } slb_error; - - struct { - enum OpalMCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } erat_error; - - struct { - enum OpalMCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } tlb_error; - } u; -}; - /* FSP memory errors handling */ enum OpalMemErr_Version { OpalMemErr_V1 = 1, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index a5139ea6910b..24a386cbb928 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -42,7 +42,6 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; -struct opal_machine_check_event; /* * Defines the layout of the paca. @@ -153,12 +152,6 @@ struct paca_struct { u64 tm_scratch; /* TM scratch area for reclaim */ #endif -#ifdef CONFIG_PPC_POWERNV - /* Pointer to OPAL machine check event structure set by the - * early exception handler for use by high level C handler - */ - struct opal_machine_check_event *opal_mc_evt; -#endif #ifdef CONFIG_PPC_BOOK3S_64 /* Exclusive emergency stack pointer for machine check exception. */ void *mc_emergency_sp; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9d7dede2847c..c161ef3f28a1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -726,12 +726,5 @@ int main(void) arch.timing_last_enter.tv32.tbl)); #endif -#ifdef CONFIG_PPC_POWERNV - DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3)); - DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0)); - DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1)); - DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt)); -#endif - return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a1d45c161e24..ad62f4d6ce31 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1312,23 +1312,6 @@ hmi_exception_after_realmode: EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv -#ifdef CONFIG_PPC_POWERNV -_GLOBAL(opal_mc_secondary_handler) - HMT_MEDIUM_PPR_DISCARD - SET_SCRATCH0(r13) - GET_PACA(r13) - clrldi r3,r3,2 - tovirt(r3,r3) - std r3,PACA_OPAL_MC_EVT(r13) - ld r13,OPAL_MC_SRR0(r3) - mtspr SPRN_SRR0,r13 - ld r13,OPAL_MC_SRR1(r3) - mtspr SPRN_SRR1,r13 - ld r3,OPAL_MC_GPR3(r3) - GET_SCRATCH0(r13) - b machine_check_pSeries -#endif /* CONFIG_PPC_POWERNV */ - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index a1c37f9d60d2..cb0b6de79cd4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -50,7 +50,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); From 221195fb80daa1a0c2fd54a023081c416fe93340 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 25 Nov 2014 17:10:06 +0100 Subject: [PATCH 091/101] powerpc: Drop useless warning in eeh_init() This is what we get in dmesg when booting a pseries guest and the hypervisor doesn't provide EEH support. [ 0.166655] EEH functionality not supported [ 0.166778] eeh_init: Failed to call platform init function (-22) Since both powernv_eeh_init() and pseries_eeh_init() already complain when hitting an error, it is not needed to print more (especially such an uninformative message). Signed-off-by: Greg Kurz Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 05be77d9ea0e..e1b6d8e17289 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -933,11 +933,8 @@ int eeh_init(void) pr_warn("%s: Platform EEH operation not found\n", __func__); return -EEXIST; - } else if ((ret = eeh_ops->init())) { - pr_warn("%s: Failed to call platform init function (%d)\n", - __func__, ret); + } else if ((ret = eeh_ops->init())) return ret; - } /* Initialize EEH event */ ret = eeh_event_init(); From 0ec2698fe1c451606c16e21dbfbd29efce694668 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 3 Nov 2014 20:21:34 +0530 Subject: [PATCH 092/101] powerpc/mm: Check for matching hpte without taking hpte lock With smaller hash page table config, we would end up in situation where we would be replacing hash page table slot frequently. In such config, we will find the hpte to be not matching, and we can do that check without holding the hpte lock. We need to recheck the hpte again after holding lock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_native_64.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d53288a08c37..558e50bac6f7 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -294,8 +294,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -308,16 +306,24 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> miss\n"); ret = -1; } else { - DBG_LOW(" -> hit\n"); - /* Update the HPTE */ - hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = be64_to_cpu(hptep->v); + if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID))) { + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & + ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | + HPTE_R_C))); + } + native_unlock_hpte(hptep); } - native_unlock_hpte(hptep); - /* Ensure it is out of the tlb too. */ tlbie(vpn, bpsize, apsize, ssize, local); - return ret; } From 0de3b56b137e92afa6a4581a0bb51633159180d6 Mon Sep 17 00:00:00 2001 From: Jiang Lu Date: Fri, 21 Nov 2014 11:23:19 +0800 Subject: [PATCH 093/101] powerpc/oprofile: Disable pagefaults during user stack read A page fault occurred during reading user stack in oprofile backtrace would lead following calltrace: WARNING: at linux/kernel/smp.c:210 Modules linked in: CPU: 5 PID: 736 Comm: sh Tainted: G W 3.14.23-WR7.0.0.0_standard #1 task: c0000000f6208bc0 ti: c00000007c72c000 task.ti: c00000007c72c000 NIP: c0000000000ed6e4 LR: c0000000000ed5b8 CTR: 0000000000000000 REGS: c00000007c72f050 TRAP: 0700 Tainted: G W (3.14.23-WR7.0.0 tandard) MSR: 0000000080021000 CR: 48222482 XER: 00000000 SOFTE: 0 GPR00: c0000000000ed5b8 c00000007c72f2d0 c0000000010aa048 0000000000000005 GPR04: c000000000fdb820 c00000007c72f410 0000000000000001 0000000000000005 GPR08: c0000000010b5768 c000000000f8a048 0000000000000001 0000000000000000 GPR12: 0000000048222482 c00000000fffe580 0000000022222222 0000000010129664 GPR16: 0000000010143cc0 0000000000000000 0000000044444444 0000000000000000 GPR20: c00000007c7221d8 c0000000f638e3c8 000003f15a20120d 0000000000000001 GPR24: 000000005a20120d c00000007c722000 c00000007cdedda8 00003fffef23b160 GPR28: 0000000000000001 c00000007c72f410 c000000000fdb820 0000000000000006 NIP [c0000000000ed6e4] .smp_call_function_single+0x18c/0x248 LR [c0000000000ed5b8] .smp_call_function_single+0x60/0x248 Call Trace: [c00000007c72f2d0] [c0000000000ed5b8] .smp_call_function_single+0x60/0x248 (unreliable) [c00000007c72f3a0] [c000000000030810] .__flush_tlb_page+0x164/0x1b0 [c00000007c72f460] [c00000000002e054] .ptep_set_access_flags+0xb8/0x168 [c00000007c72f500] [c0000000001ad3d8] .handle_mm_fault+0x4a8/0xbac [c00000007c72f5e0] [c000000000bb3238] .do_page_fault+0x3b8/0x868 [c00000007c72f810] [c00000000001e1d0] storage_fault_common+0x20/0x44 Exception: 301 at .__copy_tofrom_user_base+0x54/0x5b0 LR = .op_powerpc_backtrace+0x190/0x20c [c00000007c72fb00] [c000000000a2ec34] .op_powerpc_backtrace+0x204/0x20c (unreliable) [c00000007c72fbc0] [c000000000a2b5fc] .oprofile_add_ext_sample+0xe8/0x118 [c00000007c72fc70] [c000000000a2eee0] .fsl_emb_handle_interrupt+0x20c/0x27c [c00000007c72fd30] [c000000000a2e440] .op_handle_interrupt+0x44/0x58 [c00000007c72fdb0] [c000000000016d68] .performance_monitor_exception+0x74/0x90 [c00000007c72fe30] [c00000000001d8b4] exc_0x260_common+0xfc/0x100 performance_monitor_exception() is executed in a context with interrupt disabled and preemption enabled. When there is a user space page fault happened, do_page_fault() invoke in_atomic() to decide whether kernel should handle such page fault. in_atomic() only check preempt_count. So need call pagefault_disable() to disable preemption before reading user stack. Signed-off-by: Jiang Lu Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/oprofile/backtrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6adf55fa5d88..ecc66d5f02c9 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -105,6 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) first_frame = 0; } } else { + pagefault_disable(); #ifdef CONFIG_PPC64 if (!is_32bit_task()) { while (depth--) { @@ -113,7 +114,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } - + pagefault_enable(); return; } #endif @@ -124,5 +125,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } + pagefault_enable(); } } From c4f3eb5fc527b749d6fb0d47ffdcfe83706dbe2f Mon Sep 17 00:00:00 2001 From: James Yang Date: Fri, 14 Nov 2014 12:32:24 -0600 Subject: [PATCH 094/101] powerpc/mm/hugetlb: Sanity check gigantic hugepage count Limit the number of gigantic hugepages specified by the hugepages= parameter to MAX_NUMBER_GPAGES. Signed-off-by: James Yang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hugetlbpage.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de82375d..747e0c616526 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -355,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val, if (size != 0) { if (sscanf(val, "%lu", &npages) <= 0) npages = 0; + if (npages > MAX_NUMBER_GPAGES) { + pr_warn("MMU: %lu pages requested for page " + "size %llu KB, limiting to " + __stringify(MAX_NUMBER_GPAGES) "\n", + npages, size / 1024); + npages = MAX_NUMBER_GPAGES; + } gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; size = 0; } From f1581bf14bc40b4a14bf10358eac0b22173b3313 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:27 +0530 Subject: [PATCH 095/101] powerpc/mm/thp: Remove code duplication Rename invalidate_old_hpte to flush_hash_hugepage and use that in other places. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/tlbflush.h | 3 +- arch/powerpc/mm/hash_utils_64.c | 52 +++++++++++++++++++++++ arch/powerpc/mm/hugepage-hash64.c | 54 +----------------------- arch/powerpc/mm/pgtable_64.c | 64 ++++------------------------- 4 files changed, 65 insertions(+), 108 deletions(-) diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index cd7c2719d3ef..19550d346fea 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -127,7 +127,8 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); - +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f01027731e23..6c2076c65d7c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1315,6 +1315,58 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #endif } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void flush_hash_range(unsigned long number, int local) { if (ppc_md.flush_hash_range) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..1b3ad46a71b5 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,57 +18,6 @@ #include #include -static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) -{ - int i, max_hpte_count, valid; - unsigned long s_addr; - unsigned char *hpte_slot_array; - unsigned long hidx, shift, vpn, hash, slot; - - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; - - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } -} - - int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * hash page table entries. */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) - invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, + ssize); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 87ff0c1908a9..c175c990580e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,29 +739,13 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, i; - unsigned long s_addr; - int max_hpte_count; - unsigned int psize, valid; - unsigned char *hpte_slot_array; - unsigned long hidx, vpn, vsid, hash, shift, slot; - - /* - * Flush all the hptes mapping this hugepage - */ - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; + int ssize; + unsigned int psize; + unsigned long vsid; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM - psize = get_slice_psize(mm, s_addr); + psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif if (old_pmd & _PAGE_COMBO) @@ -769,46 +753,16 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, else psize = MMU_PAGE_64K; - if (!is_kernel_addr(s_addr)) { - ssize = user_segment_size(s_addr); - vsid = get_vsid(mm->context.id, s_addr, ssize); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); WARN_ON(vsid == 0); } else { - vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, - hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) From d557b09800dab5dd6804e5b79324069abcf0be11 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:28 +0530 Subject: [PATCH 096/101] powerpc/mm/thp: Use tlbiel if possible If we know that user address space has never executed on other cpus we could use tlbiel. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/tlbflush.h | 3 ++- arch/powerpc/mm/hash_native_64.c | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 28 ++++++++++++++++++++++----- arch/powerpc/mm/hugepage-hash64.c | 2 +- arch/powerpc/mm/pgtable_64.c | 9 +++++++-- arch/powerpc/platforms/pseries/lpar.c | 2 +- 7 files changed, 37 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 15c9150a58cc..e5c0919acca4 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -60,7 +60,7 @@ struct machdep_calls { void (*hugepage_invalidate)(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize); + int psize, int ssize, int local); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 19550d346fea..4d3ecd8d8929 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -128,7 +128,8 @@ extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize); + pmd_t *pmdp, unsigned int psize, int ssize, + int local); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 558e50bac6f7..13700911b522 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -425,7 +425,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i; struct hash_pte *hptep; @@ -471,7 +471,7 @@ static void native_hugepage_invalidate(unsigned long vsid, * instruction compares entry_VA in tlb with the VA specified * here */ - tlbie(vpn, psize, actual_psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, local); } local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6c2076c65d7c..68211d398fdb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1317,7 +1317,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) + pmd_t *pmdp, unsigned int psize, int ssize, int local) { int i, max_hpte_count, valid; unsigned long s_addr; @@ -1334,9 +1334,11 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); + if (ppc_md.hugepage_invalidate) { + ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); + goto tm_abort; + } /* * No bluk hpte removal support, invalidate each entry */ @@ -1362,8 +1364,24 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); + MMU_PAGE_16M, ssize, local); } +tm_abort: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); + } +#endif } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 1b3ad46a71b5..3a648cd363ae 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize); + ssize, local); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c175c990580e..eea9fa1f8ae7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize; + int ssize, local = 0; unsigned int psize; unsigned long vsid; + const struct cpumask *tmp; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM @@ -762,7 +763,11 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, ssize = mmu_kernel_ssize; } - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); + tmp = cpumask_of(smp_processor_id()); + if (cpumask_equal(mm_cpumask(mm), tmp)) + local = 1; + + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d214a012b026..832f221840f2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -442,7 +442,7 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i, index = 0; unsigned long s_addr = addr; From 1ad7d70562eeb14df8b6d3e1a0a56f1bdfb990f7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 28 Nov 2014 10:06:42 +0530 Subject: [PATCH 097/101] powerpc/xmon: Enable HW instruction breakpoint on POWER8 This patch enables support for hardware instruction breakpoint in xmon on POWER8 platform with the help of a new register called the CIABR (Completed Instruction Address Breakpoint Register). With this patch, a single hardware instruction breakpoint can be added and cleared during any active xmon debug session. The hardware based instruction breakpoint mechanism works correctly with the existing TRAP based instruction breakpoint available on xmon. There are no powerpc CPU with CPU_FTR_IABR feature any more. This patch has re-purposed all the existing IABR related code to work with CIABR register based HW instruction breakpoint. This has one odd feature, which is that when we hit a breakpoint xmon doesn't tell us we have hit the breakpoint. This is because xmon is expecting bp->address == regs->nip. Because CIABR fires on completition regs->nip points to the instruction after the breakpoint. We could fix that, but it would then confuse other parts of the xmon code which think we need to emulate the instruction. [mpe] Signed-off-by: Michael Ellerman Signed-off-by: Anshuman Khandual --- arch/powerpc/xmon/xmon.c | 58 +++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index ef18021d52e1..820dc135f040 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -51,6 +51,12 @@ #include #endif +#if defined(CONFIG_PPC_SPLPAR) +#include +#else +static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; +#endif + #include "nonstdio.h" #include "dis-asm.h" @@ -270,6 +276,45 @@ static inline void cinval(void *p) asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); } +/** + * write_ciabr() - write the CIABR SPR + * @ciabr: The value to write. + * + * This function writes a value to the CIARB register either directly + * through mtspr instruction if the kernel is in HV privilege mode or + * call a hypervisor function to achieve the same in case the kernel + * is in supervisor privilege mode. + */ +static void write_ciabr(unsigned long ciabr) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + mtspr(SPRN_CIABR, ciabr); + return; + } + plapr_set_ciabr(ciabr); +} + +/** + * set_ciabr() - set the CIABR + * @addr: The value to set. + * + * This function sets the correct privilege value into the the HW + * breakpoint address before writing it up in the CIABR register. + */ +static void set_ciabr(unsigned long addr) +{ + addr &= ~CIABR_PRIV; + + if (cpu_has_feature(CPU_FTR_HVMODE)) + addr |= CIABR_PRIV_HYPER; + else + addr |= CIABR_PRIV_SUPER; + write_ciabr(addr); +} + /* * Disable surveillance (the service processor watchdog function) * while we are in xmon. @@ -764,9 +809,9 @@ static void insert_cpu_bpts(void) brk.len = 8; __set_breakpoint(&brk); } - if (iabr && cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, iabr->address - | (iabr->enabled & (BP_IABR|BP_IABR_TE))); + + if (iabr) + set_ciabr(iabr->address); } static void remove_bpts(void) @@ -792,8 +837,7 @@ static void remove_bpts(void) static void remove_cpu_bpts(void) { hw_breakpoint_disable(); - if (cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, 0); + write_ciabr(0); } /* Command interpreting routine */ @@ -1128,7 +1172,7 @@ static char *breakpoint_help_string = "b [cnt] set breakpoint at given instr addr\n" "bc clear all breakpoints\n" "bc clear breakpoint number n or at addr\n" - "bi [cnt] set hardware instr breakpoint (POWER3/RS64 only)\n" + "bi [cnt] set hardware instr breakpoint (POWER8 only)\n" "bd [cnt] set hardware data breakpoint\n" ""; @@ -1167,7 +1211,7 @@ bpt_cmds(void) break; case 'i': /* bi - hardware instr breakpoint */ - if (!cpu_has_feature(CPU_FTR_IABR)) { + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { printf("Hardware instruction breakpoint " "not supported on this cpu\n"); break; From abb90ee7bca5af977b90a0c6be44f631fdacc932 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Dec 2014 16:54:13 +1100 Subject: [PATCH 098/101] powerpc/xmon: Cleanup the breakpoint flags Drop BP_IABR_TE, which though used, does not do anything useful. Rename BP_IABR to BP_CIABR. Renumber the flags. Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 820dc135f040..dfe337238699 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -94,10 +94,9 @@ struct bpt { }; /* Bits in bpt.enabled */ -#define BP_IABR_TE 1 /* IABR translation enabled */ -#define BP_IABR 2 -#define BP_TRAP 8 -#define BP_DABR 0x10 +#define BP_CIABR 1 +#define BP_TRAP 2 +#define BP_DABR 4 #define NBPTS 256 static struct bpt bpts[NBPTS]; @@ -772,7 +771,7 @@ static void insert_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) == 0) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0) continue; if (mread(bp->address, &bp->instr[0], 4) != 4) { printf("Couldn't read instruction at %lx, " @@ -787,7 +786,7 @@ static void insert_bpts(void) continue; } store_inst(&bp->instr[0]); - if (bp->enabled & BP_IABR) + if (bp->enabled & BP_CIABR) continue; if (mwrite(bp->address, &bpinstr, 4) != 4) { printf("Couldn't write instruction at %lx, " @@ -822,7 +821,7 @@ static void remove_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) != BP_TRAP) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP) continue; if (mread(bp->address, &instr, 4) == 4 && instr == bpinstr @@ -1217,7 +1216,7 @@ bpt_cmds(void) break; } if (iabr) { - iabr->enabled &= ~(BP_IABR | BP_IABR_TE); + iabr->enabled &= ~BP_CIABR; iabr = NULL; } if (!scanhex(&a)) @@ -1226,7 +1225,7 @@ bpt_cmds(void) break; bp = new_breakpoint(a); if (bp != NULL) { - bp->enabled |= BP_IABR | BP_IABR_TE; + bp->enabled |= BP_CIABR; iabr = bp; } break; @@ -1283,7 +1282,7 @@ bpt_cmds(void) if (!bp->enabled) continue; printf("%2x %s ", BP_NUM(bp), - (bp->enabled & BP_IABR)? "inst": "trap"); + (bp->enabled & BP_CIABR) ? "inst": "trap"); xmon_print_symbol(bp->address, " ", "\n"); } break; From aefa5688c070727b8729de1aef85cad7b9933fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Dec 2014 11:00:14 +0530 Subject: [PATCH 099/101] powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/mmu-hash64.h | 22 +++++++----- arch/powerpc/include/asm/tlbflush.h | 4 +-- arch/powerpc/kernel/exceptions-64s.S | 2 ++ arch/powerpc/mm/hash_low_64.S | 15 ++++---- arch/powerpc/mm/hash_native_64.c | 15 +++++--- arch/powerpc/mm/hash_utils_64.c | 44 ++++++++++++++--------- arch/powerpc/mm/hugepage-hash64.c | 8 ++--- arch/powerpc/mm/hugetlbpage-hash64.c | 6 ++-- arch/powerpc/mm/pgtable_64.c | 7 ++-- arch/powerpc/platforms/cell/beat_htab.c | 4 +-- arch/powerpc/platforms/cell/spu_base.c | 5 +-- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- drivers/misc/cxl/fault.c | 8 +++-- 16 files changed, 91 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e5c0919acca4..c8175a3fe560 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -42,7 +42,7 @@ struct machdep_calls { unsigned long newpp, unsigned long vpn, int bpsize, int apsize, - int ssize, int local); + int ssize, unsigned long flags); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index aeebc94b2bce..4f13c3ed7acf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -316,27 +316,33 @@ static inline unsigned long hpt_hash(unsigned long vpn, return hash & 0x7fffffffffUL; } +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize, int subpage_prot); + unsigned long flags, int ssize, int subpage_prot); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize); + unsigned long flags, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize); + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, - int local, int ssize, unsigned int psize); + unsigned long flags, int ssize, unsigned int psize); #else static inline int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, - unsigned long trap, int local, + unsigned long trap, unsigned long flags, int ssize, unsigned int psize) { BUG(); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 4d3ecd8d8929..23d351ca0303 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -125,11 +125,11 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, int local); + int ssize, unsigned long flags); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, - int local); + unsigned long flags); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ad62f4d6ce31..6213f494f40b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1565,9 +1565,11 @@ do_hash_page: * r3 contains the faulting address * r4 contains the required access permissions * r5 contains the trap number + * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + ld r6,_DSISR(r1) bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5094f32b706e..463174a4a647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 13700911b522..9c4880ddecd6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,11 +283,11 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); @@ -322,8 +322,15 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } native_unlock_hpte(hptep); } - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); + return ret; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68211d398fdb..e56a307bc676 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, int local) + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) { int i, max_hpte_count, valid; unsigned long s_addr; unsigned char *hpte_slot_array; unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; s_addr = addr & HPAGE_PMD_MASK; hpte_slot_array = get_hpte_slot_array(pmdp); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 3a648cd363ae..86686514ae13 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -19,8 +19,8 @@ #include int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize, local); + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -108,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf9301196..d94b1af53a93 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index eea9fa1f8ae7..4fe5f64cc179 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, local = 0; + int ssize; unsigned int psize; unsigned long vsid; + unsigned long flags = 0; const struct cpumask *tmp; /* get the base page size,vsid and segment size */ @@ -765,9 +766,9 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, tmp = cpumask_of(smp_processor_id()); if (cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index d4d245c0d787..bee9232fe619 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -186,7 +186,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -369,7 +369,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; unsigned long want_v; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ffcbd242e669..f7af74f83693 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -181,7 +181,8 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 0; } -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; @@ -196,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e45894a08118..d98f845ac777 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -144,7 +144,7 @@ int spufs_handle_class1(struct spu_context *ctx) access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; local_irq_save(flags); - ret = hash_page(ea, access, 0x300); + ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); /* hashing failed, so try the actual fault handler */ diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3e270e3412ae..2f95d33cf34a 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -110,7 +110,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { int result; u64 hpte_v, want_v, hpte_rs; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 832f221840f2..469751d92004 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -284,7 +284,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index c99e896604ee..f8684bca2d79 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -133,7 +133,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, { unsigned flt = 0; int result; - unsigned long access, flags; + unsigned long access, flags, inv_flags = 0; if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); @@ -149,8 +149,12 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, access |= _PAGE_RW; if ((!ctx->kernel) || ~(dar & (1ULL << 63))) access |= _PAGE_USER; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + local_irq_save(flags); - hash_page_mm(mm, dar, access, 0x300); + hash_page_mm(mm, dar, access, 0x300, inv_flags); local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); From 682e77c861c4c60f79ffbeae5e1938ffed24a575 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 5 Dec 2014 10:01:15 +0530 Subject: [PATCH 100/101] powerpc/book3s: Fix partial invalidation of TLBs in MCE code. The existing MCE code calls flush_tlb hook with IS=0 (single page) resulting in partial invalidation of TLBs which is not right. This patch fixes that by passing IS=0xc00 to invalidate whole TLB for successful recovery from TLB and ERAT errors. Cc: stable@vger.kernel.org Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index aa9aff3d6ad3..b6f123ab90ed 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -79,7 +79,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) } if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); /* reset error bits */ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; } @@ -110,7 +110,7 @@ static long mce_handle_common_ierror(uint64_t srr1) break; case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); handled = 1; } break; From 56548fc0e86cb9156af7a7e1f15ba78f251dafaf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 3 Dec 2014 14:48:40 +1100 Subject: [PATCH 101/101] powerpc/powernv: Return to cpu offline loop when finished in KVM guest When a secondary hardware thread has finished running a KVM guest, we currently put that thread into nap mode using a nap instruction in the KVM code. This changes the code so that instead of doing a nap instruction directly, we instead cause the call to power7_nap() that put the thread into nap mode to return. The reason for doing this is to avoid having the KVM code having to know what low-power mode to put the thread into. In the case of a secondary thread used to run a KVM guest, the thread will be offline from the point of view of the host kernel, and the relevant power7_nap() call is the one in pnv_smp_cpu_disable(). In this case we don't want to clear pending IPIs in the offline loop in that function, since that might cause us to miss the wakeup for the next time the thread needs to run a guest. To tell whether or not to clear the interrupt, we use the SRR1 value returned from power7_nap(), and check if it indicates an external interrupt. We arrange that the return from power7_nap() when we have finished running a guest returns 0, so pending interrupts don't get flushed in that case. Note that it is important a secondary thread that has finished executing in the guest, or that didn't have a guest to run, should not return to power7_nap's caller while the kvm_hstate.hwthread_req flag in the PACA is non-zero, because the return from power7_nap will reenable the MMU, and the MMU might still be in guest context. In this situation we spin at low priority in real mode waiting for hwthread_req to become zero. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 + arch/powerpc/kernel/idle_power7.S | 12 +++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 54 +++++++++++++++++-------- arch/powerpc/platforms/powernv/smp.c | 23 ++++++++--- 5 files changed, 68 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index dda7ac4c80bd..29c3798cf800 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -451,7 +451,7 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern void power7_nap(int check_irq); +extern unsigned long power7_nap(int check_irq); extern void power7_sleep(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6213f494f40b..db08382e19f1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -131,6 +131,8 @@ BEGIN_FTR_SECTION 1: #endif + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 beq cr1,2f b power7_wakeup_noloss 2: b power7_wakeup_loss diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index c0754bbf8118..18c0687e5ab3 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -212,6 +212,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mtspr SPRN_SRR0,r5 rfid +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION @@ -219,15 +223,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) - ld r3,_CCR(r1) + ld r6,_CCR(r1) ld r4,_MSR(r1) ld r5,_NIP(r1) addi r1,r1,INT_FRAME_SIZE - mtcr r3 + mtcr r6 mtspr SPRN_SRR1,r4 mtspr SPRN_SRR0,r5 rfid +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index edb2ccdbb2ba..65c105b17a25 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -201,8 +201,6 @@ kvmppc_primary_no_guest: bge kvm_novcpu_exit /* another thread already exiting */ li r3, NAPPING_NOVCPU stb r3, HSTATE_NAPPING(r13) - li r3, 1 - stb r3, HSTATE_HWTHREAD_REQ(r13) b kvm_do_nap @@ -293,6 +291,8 @@ kvm_start_guest: /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest +kvm_secondary_got_guest: + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR(r13) std r6, HSTATE_DSCR(r13) @@ -318,27 +318,46 @@ kvm_start_guest: stwcx. r3, 0, r4 bne 51b +/* + * At this point we have finished executing in the guest. + * We need to wait for hwthread_req to become zero, since + * we may not turn on the MMU while hwthread_req is non-zero. + * While waiting we also need to check if we get given a vcpu to run. + */ kvm_no_guest: - li r0, KVM_HWTHREAD_IN_NAP + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 53f + HMT_MEDIUM + li r0, KVM_HWTHREAD_IN_KERNEL stb r0, HSTATE_HWTHREAD_STATE(r13) -kvm_do_nap: - /* Clear the runlatch bit before napping */ - mfspr r2, SPRN_CTRLF - clrrdi r2, r2, 1 - mtspr SPRN_CTRLT, r2 - + /* need to recheck hwthread_req after a barrier, to avoid race */ + sync + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 54f +/* + * We jump to power7_wakeup_loss, which will return to the caller + * of power7_nap in the powernv cpu offline loop. The value we + * put in r3 becomes the return value for power7_nap. + */ li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + li r3, 0 + b power7_wakeup_loss + +53: HMT_LOW + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + beq kvm_no_guest + HMT_MEDIUM + b kvm_secondary_got_guest + +54: li r0, KVM_HWTHREAD_IN_KVM + stb r0, HSTATE_HWTHREAD_STATE(r13) + b kvm_no_guest /****************************************************************************** * * @@ -2172,6 +2191,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the * runlatch bit before napping. */ +kvm_do_nap: mfspr r2, SPRN_CTRLF clrrdi r2, r2, 1 mtspr SPRN_CTRLT, r2 diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 4753958cd509..b716f666e48a 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -149,6 +149,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; + unsigned long srr1; /* Standard hot unplug procedure */ local_irq_disable(); @@ -165,13 +166,25 @@ static void pnv_smp_cpu_kill_self(void) mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { ppc64_runlatch_off(); - power7_nap(1); + srr1 = power7_nap(1); ppc64_runlatch_on(); - /* Clear the IPI that woke us up */ - icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - mb(); + /* + * If the SRR1 value indicates that we woke up due to + * an external interrupt, then clear the interrupt. + * We clear the interrupt before checking for the + * reason, so as to avoid a race where we wake up for + * some other reason, find nothing and clear the interrupt + * just as some other cpu is sending us an interrupt. + * If we returned from power7_nap as a result of + * having finished executing in a KVM guest, then srr1 + * contains 0. + */ + if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; + smp_mb(); + } if (cpu_core_split_required()) continue;