From 69111bac42f5ceacdd22e30947837ceb2c4493ed Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 21 Oct 2014 15:23:25 -0500 Subject: powerpc: Replace __get_cpu_var uses This still has not been merged and now powerpc is the only arch that does not have this change. Sorry about missing linuxppc-dev before. V2->V2 - Fix up to work against 3.18-rc1 __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter [mpe: Fix build errors caused by set/or_softirq_pending(), and rework assignment in __set_breakpoint() to use memcpy().] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 1bbb301..8add8b8 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,7 +21,12 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending +#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 2def01ed..cd7c271 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0d050ea..6997f4a 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -98,7 +98,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -111,7 +111,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -121,7 +121,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -133,7 +133,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index d55c76c..f421781 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __get_cpu_var(irq_stat).doorbell_irqs++; + __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1f7d84e..05e804c 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __get_cpu_var(bp_per_reg); + bp = __this_cpu_read(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a10642a..71e60bf 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c143835..8d87bb1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __get_cpu_var(irq_stat).spurious_irqs++; + __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 85046573..e77c3cc 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - &__get_cpu_var(kgdb_thread_info); + this_cpu_ptr(&kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2f72af8..7c053f2 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a7fd4cb..15c99b6 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __get_cpu_var(mce_nest_count)++; - struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + int index = __this_cpu_inc_return(mce_nest_count); + struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __get_cpu_var(mce_nest_count) - 1; + int index = __this_cpu_read(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = &__get_cpu_var(mce_event[index]); + mc_evt = this_cpu_ptr(&mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __get_cpu_var(mce_nest_count)--; + __this_cpu_dec(mce_nest_count); return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __get_cpu_var(mce_queue_count)++; + index = __this_cpu_inc_return(mce_queue_count); /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __get_cpu_var(mce_queue_count)--; + __this_cpu_dec(mce_queue_count); return; } - __get_cpu_var(mce_event_queue[index]) = evt; + memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__get_cpu_var(mce_queue_count) > 0) { - index = __get_cpu_var(mce_queue_count) - 1; + while (__this_cpu_read(mce_queue_count) > 0) { + index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - &__get_cpu_var(mce_event_queue[index])); - __get_cpu_var(mce_queue_count)--; + this_cpu_ptr(&mce_event_queue[index])); + __this_cpu_dec(mce_queue_count); } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 923cd2d..91e132b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -499,7 +499,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __get_cpu_var(current_brk) = *brk; + memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -842,7 +842,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -856,7 +856,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -866,7 +866,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -889,7 +889,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 71e186d..8b2d2dc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -243,7 +243,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = &__get_cpu_var(ipi_message); + struct cpu_messages *info = this_cpu_ptr(&ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -442,9 +442,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); smp_wmb(); - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 67fd2fd..fa1fd8a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) + if (__this_cpu_read(pmcs_enabled)) return; - __get_cpu_var(pmcs_enabled) = 1; + __this_cpu_write(pmcs_enabled, 1); if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7505599..9f8ea61 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ static void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __get_cpu_var(irq_stat).timer_irqs_event++; + __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ static void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __get_cpu_var(irq_stat).timer_irqs_others++; + __this_cpu_inc(irq_stat.timer_irqs_others); } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ static void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0dc43f9..e6595b7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __get_cpu_var(irq_stat).hmi_exceptions++; + __this_cpu_inc(irq_stat.hmi_exceptions); if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __get_cpu_var(irq_stat).pmu_irqs++; + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2e02ed8..1609584 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = ++(__get_cpu_var(pcpu_last_used_sid)); + sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; + __this_cpu_write(pcpu_sids)entry[sid], entry); entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && + entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + __this_cpu_write(pcpu_last_used_sid, 0); + memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2fdc872..cda695d 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -144,9 +144,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { + __this_cpu_read(last_vcpu_of_lpid[get_lpid(vcpu)]) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; + __this_cpu_write(last_vcpu_of_lpid[get_lpid(vcpu)], vcpu); } } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ae4962a..d53288a 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -629,7 +629,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d5339a3..f010277 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1322,7 +1322,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee25..ba47aaf 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae9..8aa04f0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a6995d4..7c4f669 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ static void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ static void power_pmu_start_txn(struct pmu *pmu) */ static void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index d35ae52..4acaea0 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 8a106b4..4c11421 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = &__get_cpu_var(cpu_iic); + struct iic *iic = this_cpu_ptr(&cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = &__get_cpu_var(cpu_iic); + iic = this_cpu_ptr(&cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); + out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index ae14c40..e11273b 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 5f3b232..a6c42f3 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = &__get_cpu_var(ps3_private); + struct ps3_private *pd = this_cpu_ptr(&ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 1062f71..39049e4 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 4575f0c..f02ec3a 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e32e009..49bcf7c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -199,7 +199,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -212,7 +212,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -398,7 +398,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -406,7 +406,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 8c509d5..87d8792 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -505,7 +505,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -695,7 +695,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; @@ -720,7 +720,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5a4d0fc..c3b2a7e 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fe0cca4..365249c 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* * we have to reset the cppr index to 0 because we're -- cgit v0.10.2 From 9178ba294b6839eeff1a91bed95515d783f3ee6c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Oct 2014 16:01:09 +0200 Subject: powerpc: Convert power off logic to pm_power_off The generic Linux framework to power off the machine is a function pointer called pm_power_off. The trick about this pointer is that device drivers can potentially implement it rather than board files. Today on powerpc we set pm_power_off to invoke our generic full machine power off logic which then calls ppc_md.power_off to invoke machine specific power off. However, when we want to add a power off GPIO via the "gpio-poweroff" driver, this card house falls apart. That driver only registers itself if pm_power_off is NULL to ensure it doesn't override board specific logic. However, since we always set pm_power_off to the generic power off logic (which will just not power off the machine if no ppc_md.power_off call is implemented), we can't implement power off via the generic GPIO power off driver. To fix this up, let's get rid of the ppc_md.power_off logic and just always use pm_power_off as was intended. Then individual drivers such as the GPIO power off driver can implement power off logic via that function pointer. With this patch set applied and a few patches on top of QEMU that implement a power off GPIO on the virt e500 machine, I can successfully turn off my virtual machine after halt. Signed-off-by: Alexander Graf [mpe: Squash into one patch and update changelog based on cover letter] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 307347f..f15f15c 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -142,7 +142,6 @@ struct machdep_calls { #endif void (*restart)(char *cmd); - void (*power_off)(void); void (*halt)(void); void (*panic)(char *str); void (*cpu_die)(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1362cd6..44c8d03 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -139,8 +139,8 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - if (ppc_md.power_off) - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -151,7 +151,7 @@ void machine_power_off(void) /* Used by the G5 thermal driver */ EXPORT_SYMBOL_GPL(machine_power_off); -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); void machine_halt(void) diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 58db9d0..c11ce65 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -94,7 +94,7 @@ static int avr_probe(struct i2c_client *client, { avr_i2c_client = client; ppc_md.restart = avr_reset_system; - ppc_md.power_off = avr_power_off_system; + pm_power_off = avr_power_off_system; return 0; } diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 3feffde..6af651e 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -212,6 +212,8 @@ static int __init efika_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -225,7 +227,6 @@ define_machine(efika) .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .set_rtc_time = rtas_set_rtc_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 463fa91e..15e8021 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -167,10 +167,10 @@ static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) if (ret) goto err; - /* XXX: this is potentially racy, but there is no lock for ppc_md */ - if (!ppc_md.power_off) { + /* XXX: this is potentially racy, but there is no lock for pm_power_off */ + if (!pm_power_off) { glob_mcu = mcu; - ppc_md.power_off = mcu_power_off; + pm_power_off = mcu_power_off; dev_info(&client->dev, "will provide power-off service\n"); } @@ -197,7 +197,7 @@ static int mcu_remove(struct i2c_client *client) device_remove_file(&client->dev, &dev_attr_status); if (glob_mcu == mcu) { - ppc_md.power_off = NULL; + pm_power_off = NULL; glob_mcu = NULL; } diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index e56b89a..1f309cc 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -170,7 +170,7 @@ static int __init corenet_generic_probe(void) ppc_md.get_irq = ehv_pic_get_irq; ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; + pm_power_off = fsl_hv_halt; ppc_md.halt = fsl_hv_halt; #ifdef CONFIG_SMP /* diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 8162b04..e149c9e 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -120,7 +120,7 @@ static int gpio_halt_probe(struct platform_device *pdev) /* Register our halt function */ ppc_md.halt = gpio_halt_cb; - ppc_md.power_off = gpio_halt_cb; + pm_power_off = gpio_halt_cb; printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" " irq).\n", gpio, trigger, irq); @@ -137,7 +137,7 @@ static int gpio_halt_remove(struct platform_device *pdev) free_irq(irq, halt_node); ppc_md.halt = NULL; - ppc_md.power_off = NULL; + pm_power_off = NULL; gpio_free(gpio); diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 34e8ce2..90be8ec 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -142,6 +142,7 @@ static int __init celleb_probe_beat(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS | FW_FEATURE_BEAT | FW_FEATURE_LPAR; hpte_init_beat_v3(); + pm_power_off = beat_power_off; return 1; } @@ -190,6 +191,7 @@ static int __init celleb_probe_native(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -204,7 +206,6 @@ define_machine(celleb_beat) { .setup_arch = celleb_setup_arch_beat, .show_cpuinfo = celleb_show_cpuinfo, .restart = beat_restart, - .power_off = beat_power_off, .halt = beat_halt, .get_rtc_time = beat_get_rtc_time, .set_rtc_time = beat_set_rtc_time, @@ -230,7 +231,6 @@ define_machine(celleb_native) { .setup_arch = celleb_setup_arch_native, .show_cpuinfo = celleb_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index 6e3409d..d328140 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -127,6 +127,7 @@ static int __init qpace_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -137,7 +138,6 @@ define_machine(qpace) { .setup_arch = qpace_setup_arch, .show_cpuinfo = qpace_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 6ae25fb..d62aa98 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -259,6 +259,7 @@ static int __init cell_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -269,7 +270,6 @@ define_machine(cell) { .setup_arch = cell_setup_arch, .show_cpuinfo = cell_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 5b77b19..860a59e 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -585,6 +585,8 @@ static int __init chrp_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -597,7 +599,6 @@ define_machine(chrp) { .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .time_init = chrp_time_init, .set_rtc_time = chrp_set_rtc_time, diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index bd4ba5d..fe0ed6e 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -67,6 +67,8 @@ static int __init gamecube_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,gamecube")) return 0; + pm_power_off = gamecube_power_off; + return 1; } @@ -80,7 +82,6 @@ define_machine(gamecube) { .probe = gamecube_probe, .init_early = gamecube_init_early, .restart = gamecube_restart, - .power_off = gamecube_power_off, .halt = gamecube_halt, .init_IRQ = flipper_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 168e1d8..540eeb5 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -147,6 +147,9 @@ static int __init linkstation_probe(void) if (!of_flat_dt_is_compatible(root, "linkstation")) return 0; + + pm_power_off = linkstation_power_off; + return 1; } @@ -158,7 +161,6 @@ define_machine(linkstation){ .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, .restart = linkstation_restart, - .power_off = linkstation_power_off, .halt = linkstation_halt, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 388e29b..352592d 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -211,6 +211,8 @@ static int __init wii_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,wii")) return 0; + pm_power_off = wii_power_off; + return 1; } @@ -226,7 +228,6 @@ define_machine(wii) { .init_early = wii_init_early, .setup_arch = wii_setup_arch, .restart = wii_restart, - .power_off = wii_power_off, .halt = wii_halt, .init_IRQ = wii_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index cb1b0b3..56b85cd 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -169,7 +169,7 @@ static void __init maple_use_rtas_reboot_and_halt_if_present(void) if (rtas_service_present("system-reboot") && rtas_service_present("power-off")) { ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } } @@ -312,6 +312,7 @@ static int __init maple_probe(void) alloc_dart_table(); hpte_init_native(); + pm_power_off = maple_power_off; return 1; } @@ -325,7 +326,6 @@ define_machine(maple) { .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, .restart = maple_restart, - .power_off = maple_power_off, .halt = maple_halt, .get_boot_time = maple_get_boot_time, .set_rtc_time = maple_set_rtc_time, diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index b127a29..713d36d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -632,6 +632,8 @@ static int __init pmac_probe(void) smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); #endif /* CONFIG_PMAC_SMU */ + pm_power_off = pmac_power_off; + return 1; } @@ -663,7 +665,6 @@ define_machine(powermac) { .get_irq = NULL, /* changed later */ .pci_irq_fixup = pmac_pci_irq_fixup, .restart = pmac_restart, - .power_off = pmac_power_off, .halt = pmac_halt, .time_init = pmac_time_init, .get_boot_time = pmac_get_boot_time, diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3f9546d..941831d 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -268,7 +268,7 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.get_rtc_time = opal_get_rtc_time; ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; - ppc_md.power_off = pnv_power_off; + pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; @@ -285,7 +285,7 @@ static void __init pnv_setup_machdep_rtas(void) ppc_md.set_rtc_time = rtas_set_rtc_time; } ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } #endif /* CONFIG_PPC_POWERNV_RTAS */ diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3f509f8..009a200 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -248,6 +248,7 @@ static int __init ps3_probe(void) ps3_mm_init(); ps3_mm_vas_create(&htab_size); ps3_hpte_init(htab_size); + pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); return 1; @@ -278,7 +279,6 @@ define_machine(ps3) { .calibrate_decr = ps3_calibrate_decr, .progress = ps3_progress, .restart = ps3_restart, - .power_off = ps3_power_off, .halt = ps3_halt, #if defined(CONFIG_KEXEC) .kexec_cpu_down = ps3_kexec_cpu_down, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 125c589..db0fc0c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -659,6 +659,34 @@ static void __init pSeries_init_early(void) pr_debug(" <- pSeries_init_early()\n"); } +/** + * pseries_power_off - tell firmware about how to power off the system. + * + * This function calls either the power-off rtas token in normal cases + * or the ibm,power-off-ups token (if present & requested) in case of + * a power failure. If power-off token is used, power on will only be + * possible with power button press. If ibm,power-off-ups token is used + * it will allow auto poweron after power is restored. + */ +static void pseries_power_off(void) +{ + int rc; + int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); + + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); + + if (rtas_poweron_auto == 0 || + rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { + rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); + printk(KERN_INFO "RTAS power-off returned %d\n", rc); + } else { + rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); + printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); + } + for (;;); +} + /* * Called very early, MMU is off, device-tree isn't unflattened */ @@ -741,6 +769,8 @@ static int __init pSeries_probe(void) else hpte_init_native(); + pm_power_off = pseries_power_off; + pr_debug("Machine is%s LPAR !\n", (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); @@ -754,34 +784,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -/** - * pSeries_power_off - tell firmware about how to power off the system. - * - * This function calls either the power-off rtas token in normal cases - * or the ibm,power-off-ups token (if present & requested) in case of - * a power failure. If power-off token is used, power on will only be - * possible with power button press. If ibm,power-off-ups token is used - * it will allow auto poweron after power is restored. - */ -static void pSeries_power_off(void) -{ - int rc; - int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); - - if (rtas_flash_term_hook) - rtas_flash_term_hook(SYS_POWER_OFF); - - if (rtas_poweron_auto == 0 || - rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { - rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); - printk(KERN_INFO "RTAS power-off returned %d\n", rc); - } else { - rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); - printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); - } - for (;;); -} - #ifndef CONFIG_PCI void pSeries_final_fixup(void) { } #endif @@ -796,7 +798,6 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .pci_probe_mode = pSeries_pci_probe_mode, .restart = rtas_restart, - .power_off = pSeries_power_off, .halt = rtas_halt, .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index ffd1169..1e04568 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -238,7 +238,7 @@ void fsl_hv_restart(char *cmd) /* * Halt the current partition * - * This function should be assigned to the ppc_md.power_off and ppc_md.halt + * This function should be assigned to the pm_power_off and ppc_md.halt * function pointers, to shut down the partition when we're running under * the Freescale hypervisor. */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b988b5a..506d256 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -981,7 +981,8 @@ static void bootcmds(void) else if (cmd == 'h') ppc_md.halt(); else if (cmd == 'p') - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); } static int cpu_cmd(void) -- cgit v0.10.2 From 7b051f665c32db3339ac9b6d9806ef508d09647f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Oct 2014 20:27:15 +1100 Subject: powerpc: Use probe_kernel_address in show_instructions We really don't want to take a pagefault in show_instructions, so use probe_kernel_address instead of __get_user. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e132b..8c2691e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,9 +37,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -921,12 +921,8 @@ static void show_instructions(struct pt_regs *regs) pc = (unsigned long)phys_to_virt(pc); #endif - /* We use __get_user here *only* to avoid an OOPS on a - * bad address because the pc *should* only be a - * kernel address. - */ if (!__kernel_text_address(pc) || - __get_user(instr, (unsigned int __user *)pc)) { + probe_kernel_address((unsigned int __user *)pc, instr)) { printk(KERN_CONT "XXXXXXXX "); } else { if (regs->nip == pc) -- cgit v0.10.2 From adb7cd732292b06ee964d9f6090b17c70bd8bd3d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 11:40:26 +1100 Subject: powerpc/pci: Quieten unset I/O resource warning Newer POWER designs do not implement PCI I/O space, so we expect to see a number of these. Reduce the severity of the warning so it doesn't mask other real issues. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e5dad9a..bc2dab5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1464,7 +1464,7 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - printk(KERN_WARNING "PCI: I/O resource not set for host" + pr_info("PCI: I/O resource not set for host" " bridge %s (domain %d)\n", hose->dn->full_name, hose->global_number); } else { -- cgit v0.10.2 From 64ff91ff85b56321e65b476e335955af9bed2c66 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 12:24:35 +1100 Subject: powerpc: Remove ppc64_boot_msg ppc64_boot_msg is meant to be a boot debug aid, but is only used in one spot. Get rid of it, and save ourseleves a couple of lines in the kernel log buffer. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index f15f15c..4a6511f 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -342,16 +342,6 @@ extern sys_ctrler_t sys_ctrler; #endif /* CONFIG_PPC_PMAC */ - -/* Functions to produce codes on the leds. - * The SRC code should be unique for the message category and should - * be limited to the lower 24 bits (the upper 8 are set by these funcs), - * and (for boot & dump) should be sorted numerically in the order - * the events occur. - */ -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg); - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4f3cfe1..615aa90 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -665,8 +665,6 @@ static void __init emergency_stack_init(void) */ void __init setup_arch(char **cmdline_p) { - ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = boot_command_line; /* @@ -711,33 +709,6 @@ void __init setup_arch(char **cmdline_p) if ((unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); - - ppc64_boot_msg(0x15, "Setup Done"); -} - - -/* ToDo: do something useful if ppc_md is not yet setup. */ -#define PPC64_LINUX_FUNCTION 0x0f000000 -#define PPC64_IPL_MESSAGE 0xc0000000 -#define PPC64_TERM_MESSAGE 0xb0000000 - -static void ppc64_do_msg(unsigned int src, const char *msg) -{ - if (ppc_md.progress) { - char buf[128]; - - sprintf(buf, "%08X\n", src); - ppc_md.progress(buf, 0); - snprintf(buf, 128, "%s", msg); - ppc_md.progress(buf, 0); - } -} - -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); - printk("[boot]%04x %s\n", src, msg); } #ifdef CONFIG_SMP -- cgit v0.10.2 From 16d0f5c4af76b0c3424290937bf1ac22adf439b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Oct 2014 22:17:47 +1100 Subject: powerpc: Remove ppc_md.remove_memory We have an extra level of indirection on memory hot remove which is not matched on memory hot add. Memory hotplug is book3s only, so there is no need for it. This also enables means remove_memory() (ie memory hot unplug) works on powernv. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4a6511f..15c9150 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -291,10 +291,6 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_long)(unsigned long *v); #endif - -#ifdef CONFIG_MEMORY_HOTREMOVE - int (*remove_memory)(u64, u64); -#endif }; extern void e500_idle(void); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8ebaac7..2add0b7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -144,8 +145,17 @@ int arch_remove_memory(u64 start, u64 size) zone = page_zone(pfn_to_page(start_pfn)); ret = __remove_pages(zone, start_pfn, nr_pages); - if (!ret && (ppc_md.remove_memory)) - ret = ppc_md.remove_memory(start, size); + if (ret) + return ret; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); return ret; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3c4c0dc..3cb256c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -66,22 +65,6 @@ unsigned long pseries_memory_block_size(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -static int pseries_remove_memory(u64 start, u64 size) -{ - int ret; - - /* Remove htab bolted mappings for this section of memory */ - start = (unsigned long)__va(start); - ret = remove_section_mapping(start, start + size); - - /* Ensure all vmalloc mappings are flushed in case they also - * hit that section of memory - */ - vm_unmap_aliases(); - - return ret; -} - static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long block_sz, start_pfn; @@ -262,10 +245,6 @@ static int __init pseries_memory_hotplug_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_mem_nb); -#ifdef CONFIG_MEMORY_HOTREMOVE - ppc_md.remove_memory = pseries_remove_memory; -#endif - return 0; } machine_device_initcall(pseries, pseries_memory_hotplug_init); -- cgit v0.10.2 From 7439b37e7579543226cb30eeefdf989bed391997 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:06 +0200 Subject: powerpc/8xx: exception InstructionAccess does not exist on MPC8xx Exception InstructionAccess does not exist on MPC8xx. No need to branch there from somewhere else. Handling can be done directly in InstructionTLBError Exception. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fafff8d..d30f703 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -234,15 +234,10 @@ DataAccess: EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x400 InstructionAccess: - EXCEPTION_PROLOG - mr r4,r12 - mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -382,7 +377,7 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 - b InstructionAccess + b InstructionTLBError . = 0x1200 DataStoreTLBMiss: @@ -477,7 +472,11 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - b InstructionAccess + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ + EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to -- cgit v0.10.2 From 749137a2516aea627cbdd49140e60bb60d80f18e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:07 +0200 Subject: powerpc/8xx: DataAccess exception not generated by MPC8xx DataAccess exception is never generated by MPC8xx so do the job directly where it is used to avoid an unnecessary branching. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d30f703..464be24 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -219,19 +219,10 @@ MachineCheck: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x300 DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 - mfspr r4,SPRN_DAR - li r10,0x00f0 - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. * This is "never generated" by the MPC8xx. @@ -491,7 +482,15 @@ DataTLBError: beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_EPILOG_0 - b DataAccess + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXC_XFER_LITE(0x300, handle_page_fault) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) -- cgit v0.10.2 From 6cde2b6f399e7d68a4b482680850a077104f9068 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: powerpc/8xx: No need to restore registers and save them again. In DTLBError handler there is not need to restore r10, r11 and cr registers after fixing DAR as they are saved again to the same place just after. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 464be24..c061bc8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -481,8 +481,8 @@ DataTLBError: cmpwi cr0, r11, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - EXCEPTION_EPILOG_0 - EXCEPTION_PROLOG + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) mr r5,r10 -- cgit v0.10.2 From cbc130f120d5e180a70a0fa4a8b4191e44548a87 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: powerpc/8xx: Use M_TW instead of M_TWB Use M_TW instead of M_TWB for storing Level 1 table address as M_TWB requires 4k aligned tables, which is only the case with 4k pages. Consequently, we have to calculate the level 1 table index by ourselves. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c061bc8..c78f7fd 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -278,8 +278,8 @@ SystemCall: . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction - * TLB. It is modelled after the example in the Motorola manual. The task - * switch loads the M_TWB register with the pointer to the first level table. + * TLB. The task switch loads the M_TW register with the pointer to the first + * level table. * If we discover there is no second level table (value is zero) or if there * is an invalid pte, we load that into the TLB, which causes another fault * into the TLB Error interrupt where we can handle such problems. @@ -301,7 +301,6 @@ InstructionTLBMiss: #endif DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -309,14 +308,17 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ +#endif + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ +#ifdef CONFIG_MODULES beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -377,18 +379,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andi. r11, r10, 0x0800 + andis. r11, r10, 0x8000 + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -527,12 +530,12 @@ FixupDAR:/* Entry point for dcbx workaround. */ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 - mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ -3: lwz r11, 0(r11) /* Get the level 1 entry */ +3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + lwzx r11, r10, r11 /* Get the level 1 entry */ DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ @@ -541,6 +544,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ + mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ @@ -696,11 +700,11 @@ start_here: #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h ori r4, r4, cpu6_errata_word@l - li r3, 0x3980 + li r3, 0x3f80 stw r3, 12(r4) lwz r3, 12(r4) #endif - mtspr SPRN_M_TWB, r6 + mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l tophys(r4,r4) @@ -874,10 +878,10 @@ _GLOBAL(set_context) lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l tophys (r4, r4) - li r7, 0x3980 + li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) - mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + mtspr SPRN_M_TW, r4 /* Update MMU base address */ li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) @@ -885,7 +889,7 @@ _GLOBAL(set_context) #else mtspr SPRN_M_CASID,r3 /* Update context */ tophys (r4, r4) - mtspr SPRN_M_TWB, r4 /* and pgd */ + mtspr SPRN_M_TW, r4 /* and pgd */ #endif SYNC blr -- cgit v0.10.2 From 33fb845a6f019001b8ca3f532eb1a4de34547f42 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:08 +0200 Subject: powerpc/8xx: Don't use MD_TWC for walk MD_TWC can only be used properly with 4k pages. So lets calculate level 2 table index by ourselves. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c78f7fd..3eea29a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -299,8 +299,6 @@ InstructionTLBMiss: addi r11, r10, -0x1000 tlbie r11 #endif - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -328,10 +326,9 @@ InstructionTLBMiss: ori r11,r11,1 /* Set valid bit */ DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT @@ -397,12 +394,13 @@ DataStoreTLBMiss: /* We have a pte table, so load fetch the pte from the table. */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + mfspr r10, SPRN_MD_EPN /* Get address of fault */ + /* Extract level 2 index */ + rlwinm r10, r10, 22, 20, 29 + rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put @@ -528,18 +526,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ lwzx r11, r10, r11 /* Get the level 1 entry */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r11, 0(r11) /* Get the pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif -- cgit v0.10.2 From d14068035c3f6fd0d6514e061e4324a277be83e2 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: Use PAGE size related consts For PAGE size related operations, use PAGE size consts in order to be able to use different page size in the futur. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3eea29a..e126adf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -294,9 +294,9 @@ InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 - addi r11, r10, 0x1000 + addi r11, r10, PAGE_SIZE tlbie r11 - addi r11, r10, -0x1000 + addi r11, r10, -PAGE_SIZE tlbie r11 #endif @@ -315,7 +315,8 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -327,7 +328,8 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -387,7 +389,8 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -396,8 +399,8 @@ DataStoreTLBMiss: */ mfspr r10, SPRN_MD_EPN /* Get address of fault */ /* Extract level 2 index */ - rlwinm r10, r10, 22, 20, 29 - rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ ori r11, r11, 1 /* Set valid bit in physical L2 page */ @@ -530,18 +533,20 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l -3: rlwinm r10, r10, 12, 20, 29 /* Extract level 1 index */ + /* Extract level 1 index */ +3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29 /* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 - rlwimi r11, r10, 0, 20, 31 + rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, @@ -917,12 +922,13 @@ set_dec_cpu6: .globl sdata sdata: .globl empty_zero_page + .align PAGE_SHIFT empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PGD_TABLE_SIZE /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). -- cgit v0.10.2 From ac21951fa8a356e2aab6e93a61aa99b561100e67 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: Const for TLB RPN forced value Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked to the page size and will vary when we change the page size. Lets define a const for it in order to have it at only one place. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e126adf..38efa86 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,6 +40,13 @@ #else #define DO_8xx_CPU6(val, reg) #endif + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN 0x00f0 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -211,7 +218,7 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -239,7 +246,7 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -343,7 +350,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, 0x00f0 + li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -447,7 +454,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 +2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -482,7 +489,7 @@ DataTLBError: EXCEPTION_PROLOG_0 mfspr r11, SPRN_DAR - cmpwi cr0, r11, 0x00f0 + cmpwi cr0, r11, RPN_PATTERN beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 @@ -491,7 +498,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR - li r10,0x00f0 + li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -- cgit v0.10.2 From 959d6173b5cccceff47cc2d25feeaac2f96df0e0 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: Implement 16k pages This patch activates the handling of 16k pages on the MPC8xx. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4..abb8709 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -549,7 +549,7 @@ config PPC_4K_PAGES bool "4k page size" config PPC_16K_PAGES - bool "16k page size" if 44x + bool "16k page size" if 44x || PPC_8xx config PPC_64K_PAGES bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3d11d3c..986b9e1 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -56,6 +56,7 @@ * additional information from the MI_EPN, and MI_TWC registers. */ #define SPRN_MI_RPN 790 +#define MI_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* Define an RPN value for mapping kernel memory to large virtual * pages for boot initialization. This has real page number of 0, @@ -129,6 +130,7 @@ * additional information from the MD_EPN, and MD_TWC registers. */ #define SPRN_MD_RPN 798 +#define MD_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* This is a temporary storage register that could be used to save * a processor working register during a tablewalk. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 38efa86..84b0b97 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -45,7 +45,11 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ +#ifdef CONFIG_PPC_16K_PAGES +#define RPN_PATTERN (0x00f0 | MD_SPS16K) +#else #define RPN_PATTERN 0x00f0 +#endif __HEAD _ENTRY(_stext); -- cgit v0.10.2 From d3e40262e7d05236bf4c2c4fdf007589ba8af97a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: Better readibility of ERRATA CPU6 handling This patch hiddes that SPR address needed for CPU6 ERRATA handling in the macro. Then we don't have to worry about this address directly in the code. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 84b0b97..6e91241 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,12 +33,19 @@ /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 -#define DO_8xx_CPU6(val, reg) \ - li reg, val; \ - stw reg, 12(r0); \ - lwz reg, 12(r0); +#define SPRN_MI_TWC_ADDR 0x2b80 +#define SPRN_MI_RPN_ADDR 0x2d80 +#define SPRN_MD_TWC_ADDR 0x3b80 +#define SPRN_MD_RPN_ADDR 0x3d80 + +#define MTSPR_CPU6(spr, reg, treg) \ + li treg, spr##_ADDR; \ + stw treg, 12(r0); \ + lwz treg, 12(r0); \ + mtspr spr, reg #else -#define DO_8xx_CPU6(val, reg) +#define MTSPR_CPU6(spr, reg, treg) \ + mtspr spr, reg #endif /* @@ -336,8 +343,7 @@ InstructionTLBMiss: * for this "segment." */ ori r11,r11,1 /* Set valid bit */ - DO_8xx_CPU6(0x2b80, r3) - mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -356,8 +362,7 @@ InstructionTLBMiss: */ li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ - DO_8xx_CPU6(0x2d80, r3) - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 @@ -426,8 +431,7 @@ DataStoreTLBMiss: * It is bit 25 in the Linux PTE and bit 30 in the TWC */ rlwimi r11, r10, 32-5, 30, 30 - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: @@ -460,8 +464,7 @@ DataStoreTLBMiss: */ 2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 -- cgit v0.10.2 From 4094f28f90adab007eca9babf28f606a40a83032 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: set PTE bit 22 off TLBmiss No need to re-set this bit at each TLB miss. Let's set it in the PTE. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 945e47a..234e07c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,6 +170,25 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; +#ifdef CONFIG_PPC_8xx + unsigned long tmp2; + + __asm__ __volatile__("\ +1: lwarx %0,0,%4\n\ + andc %1,%0,%5\n\ + or %1,%1,%6\n\ + /* 0x200 == Extended encoding, bit 22 */ \ + /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ + rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ + rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ + or %1,%3,%1\n\ + xori %1,%1,0x200\n" +" stwcx. %1,0,%4\n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); +#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ @@ -180,6 +199,7 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); +#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index d44826e..daa4616 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -48,19 +48,22 @@ */ #define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ #define _PAGE_USER 0x0800 /* msb PP bits */ +/* set when neither _PAGE_USER nor _PAGE_RW are set */ +#define _PAGE_KNLRO 0x0200 #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_ACCESSED +#define _PTE_NONE_MASK _PAGE_KNLRO /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) #define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6e91241..8d6e683 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -447,14 +447,8 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* Honour kernel RO, User NA */ - /* 0x200 == Extended encoding, bit 22 */ - rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ - /* r11 = (r10 & _PAGE_RW) >> 1 */ - rlwinm r11, r10, 32-1, 0x200 - or r10, r11, r10 - /* invert RW and 0x200 bits */ - xori r10, r10, _PAGE_RW | 0x200 + /* invert RW */ + xori r10, r10, _PAGE_RW /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. -- cgit v0.10.2 From c9a803fb17bcec0e7527dc8fa055e56a9691abbb Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:09 +0200 Subject: powerpc/8xx: _PMD_PRESENT already set in level 1 entries When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit during TLB loading is useless. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 8d6e683..46b47e1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -342,7 +342,6 @@ InstructionTLBMiss: /* We have a pte table, so load the MI_TWC with the attributes * for this "segment." */ - ori r11,r11,1 /* Set valid bit */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -419,7 +418,6 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put -- cgit v0.10.2 From b0168eb97b8b02594f47ce44faf1502f79e540df Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: powerpc/8xx: Don't restore regs to save them again. There is not need to restore r10, r11 and cr registers at this end of ITLBmiss handler as they are saved again to the same place in ITLBError handler we are jumping to. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46b47e1..330d544 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -383,8 +383,7 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif mfspr r10, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 - b InstructionTLBError + b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -473,7 +472,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG + EXCEPTION_PROLOG_0 +InstructionTLBError1: + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ -- cgit v0.10.2 From 83c17ba35e0306e671b5c9ab622535f23a9a3e78 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: powerpc/8xx: Use DAR to save r3 for CPU6 ERRATA As we are not using anymore DAR to save registers, it is now available for saving the r3 register used for CPU6 ERRATA handling. Therefore we can remove the major hack which was to use memory location 0 to save r3. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 330d544..acf6d7e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -83,13 +83,6 @@ _ENTRY(_start); * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to * the "internal" processor registers before MMU_init is called. * - * The TLB code currently contains a major hack. Since I use the condition - * code register, I have to save and restore it. I am out of registers, so - * I just store it in memory location 0 (the TLB handlers are not reentrant). - * To avoid making any decisions, I need to use the "segment" valid bit - * in the first level table, but that would require many changes to the - * Linux page directory/table functions that I don't want to do right now. - * * -- Dan */ .globl __start @@ -306,7 +299,7 @@ SystemCall: */ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -351,7 +344,10 @@ InstructionTLBMiss: #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + li r11, RPN_PATTERN bne- cr0, 2f +#else + li r11, RPN_PATTERN #endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. @@ -359,28 +355,29 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi 2: - mfspr r11, SPRN_SRR1 + mfspr r10, SPRN_SRR1 /* clear all error bits as TLB Miss * sets a few unconditionally */ - rlwinm r11, r11, 0, 0xffff - mtspr SPRN_SRR1, r11 + rlwinm r10, r10, 0, 0xffff + mtspr SPRN_SRR1, r10 /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 b InstructionTLBError1 @@ -388,7 +385,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -459,7 +456,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 @@ -531,7 +528,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ @@ -550,7 +547,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ + mfspr r3, SPRN_DAR #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 -- cgit v0.10.2 From c51a6821bdbc9068adda93b3b8ee65df8e4642a6 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 19 Sep 2014 10:36:10 +0200 Subject: powerpc/8xx: Invalidate non present TLB as early as possible 8xx sometimes need to load a invalid/non-present TLBs in it DTLB asm handler. These must be invalidated separaly as linux mm doesn't. Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in arch/powerpc/mm/fault.c. This patch does the invalidation earlier in order to free the TLB as soon as possible. This also has the advantage of removing some 8xx specific code from fault.c Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index acf6d7e..d99aac0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -475,8 +475,11 @@ InstructionTLBError1: EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x400, handle_page_fault) +1: EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -492,11 +495,13 @@ DataTLBError: DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - li r10,RPN_PATTERN + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 +1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08d659a..eb79907 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include #include #include -#include #include "icswx.h" @@ -380,12 +379,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. - * These must be invalidated separately as linux mm don't. - */ - if (error_code & 0x40000000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x80000000, which is * "undefined". Of those that can be set, this is the only * one which seems bad. -- cgit v0.10.2 From 6f2ce34dd7a2cd34b937a149e5513edd9d9ba724 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 24 Sep 2014 10:06:19 +0200 Subject: powerpc/8xx: Remove Kconfig symbol FADS Commit 39eb56da2b53 ("pcmcia: Remove m8xx_pcmcia driver") removed the only driver that used CONFIG_FADS. Setting the Kconfig symbol FADS is pointless since that commit. Remove it. Signed-off-by: Paul Bolle Signed-off-by: Scott Wood diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index bd6f1a1..1572504 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -1,6 +1,3 @@ -config FADS - bool - config CPM1 bool select CPM @@ -13,7 +10,6 @@ choice config MPC8XXFADS bool "FADS" - select FADS config MPC86XADS bool "MPC86XADS" -- cgit v0.10.2 From 45c22ed7446fd6acdaaac673ccf5b63862a3e988 Mon Sep 17 00:00:00 2001 From: Ashish Kumar Date: Tue, 7 Oct 2014 18:04:36 +0530 Subject: powerpc/mpc85xx: Remove SPI and NAND partition from bsc9131rdb.dtsi * Run "mtdparts default" on u-boot to create dynamic partitions * Or use dynamic mtd partition with the help of bootargs in u-boot Append bootargs with: "mtdparts=ff800000.flash:1m(nand_uboot),512K(nand_dtb),8m(nand_kernel),-(fs);\ spiff707000.0:1m(spi_uboot),4m(spi_kernel),512k(spi_dtb),-(fs)'" Signed-off-by: Ashish Kumar Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi b/arch/powerpc/boot/dts/bsc9131rdb.dtsi index 9e6c013..45efcba 100644 --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi @@ -40,31 +40,6 @@ compatible = "fsl,ifc-nand"; reg = <0x0 0x0 0x4000>; - partition@0 { - /* This location must not be altered */ - /* 3MB for u-boot Bootloader Image */ - reg = <0x0 0x00300000>; - label = "NAND U-Boot Image"; - read-only; - }; - - partition@300000 { - /* 1MB for DTB Image */ - reg = <0x00300000 0x00100000>; - label = "NAND DTB Image"; - }; - - partition@400000 { - /* 8MB for Linux Kernel Image */ - reg = <0x00400000 0x00800000>; - label = "NAND Linux Kernel Image"; - }; - - partition@c00000 { - /* Rest space for Root file System Image */ - reg = <0x00c00000 0x07400000>; - label = "NAND RFS Image"; - }; }; }; @@ -82,31 +57,6 @@ reg = <0>; spi-max-frequency = <50000000>; - /* 512KB for u-boot Bootloader Image */ - partition@0 { - reg = <0x0 0x00080000>; - label = "SPI Flash U-Boot Image"; - read-only; - }; - - /* 512KB for DTB Image */ - partition@80000 { - reg = <0x00080000 0x00080000>; - label = "SPI Flash DTB Image"; - }; - - /* 4MB for Linux Kernel Image */ - partition@100000 { - reg = <0x00100000 0x00400000>; - label = "SPI Flash Kernel Image"; - }; - - /*11MB for RFS Image */ - partition@500000 { - reg = <0x00500000 0x00B00000>; - label = "SPI Flash RFS Image"; - }; - }; }; -- cgit v0.10.2 From 19bc4808f9ecdef9ca3f3b3807a5228b00d74f83 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Thu, 30 Oct 2014 11:15:47 +0200 Subject: powerpc/fsl: Added rcw registers to global utility registers The RCW registers are required for the future clock binding implementation. Signed-off-by: Igal Liberman Change-Id: Ic36dd8bc2959aa7f97fb6fd7bbb8420822fef0a9 Signed-off-by: Scott Wood diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h index 77ced0b..43b6bb1 100644 --- a/arch/powerpc/include/asm/fsl_guts.h +++ b/arch/powerpc/include/asm/fsl_guts.h @@ -68,7 +68,10 @@ struct ccsr_guts { u8 res0b4[0xc0 - 0xb4]; __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register Called 'elbcvselcr' on 86xx SOCs */ - u8 res0c4[0x224 - 0xc4]; + u8 res0c4[0x100 - 0xc4]; + __be32 rcwsr[16]; /* 0x.0100 - Reset Control Word Status registers + There are 16 registers */ + u8 res140[0x224 - 0x140]; __be32 iodelay1; /* 0x.0224 - IO delay control register 1 */ __be32 iodelay2; /* 0x.0228 - IO delay control register 2 */ u8 res22c[0x604 - 0x22c]; -- cgit v0.10.2 From 3b6b17900bfa3c1929741e720495beaa559b2aff Mon Sep 17 00:00:00 2001 From: Hongtao Jia Date: Wed, 5 Nov 2014 14:59:52 +0800 Subject: powerpc: Add ADT7461 to device tree for supported boards Including: T104xRDB T208xQDS B4QDS Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index 8b47edc..bccc986 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -152,6 +152,17 @@ reg = <0x68>; }; }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi index 1cf0f3c..187add8 100644 --- a/arch/powerpc/boot/dts/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -83,6 +83,13 @@ }; }; + i2c@118000 { + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; + i2c@118100 { pca9546@77 { compatible = "nxp,pca9546"; diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi index 555dc6e..5906183 100644 --- a/arch/powerpc/boot/dts/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -169,6 +169,17 @@ shunt-resistor = <1000>; }; }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; -- cgit v0.10.2 From 94701fcb2f085cef29b29051f21de7fc3718217a Mon Sep 17 00:00:00 2001 From: Hongtao Jia Date: Wed, 5 Nov 2014 14:59:53 +0800 Subject: powerpc: Add INA220 to device tree for supported boards Including: P3041DS P5020DS P5040DS B4QDS Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index bccc986..e5bde0b 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -153,6 +153,18 @@ }; }; + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + }; + i2c@3 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 2fed3bc..394ea9c 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 2869fea..b7f3057 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 860b5cc..7e04bf4 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -95,6 +95,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; -- cgit v0.10.2 From eaffcb0f1bebbcfd38ecc9bdca105f7123115ab1 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:11 -0600 Subject: powerpc/dts: Factorize the clock control node Signed-off-by: Emil Medve Change-Id: I25ce24a25862b4ca460164159867abefe00ccdd1 Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts index 85646b4..2aa5cd3 100644 --- a/arch/powerpc/boot/dts/b4860emu.dts +++ b/arch/powerpc/boot/dts/b4860emu.dts @@ -193,9 +193,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi index d678944..86161ae 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi @@ -80,33 +80,9 @@ compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index 582381d..65100b9 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -124,33 +124,9 @@ compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 69ce102..efd74db 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -305,53 +305,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p2041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index cd63cb1..d7425ef 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -332,53 +332,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p3041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 12947cc..7005a4a 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -352,35 +352,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p4080-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; pll2: pll2@840 { #clock-cells = <1>; @@ -398,24 +372,6 @@ clock-output-names = "pll3", "pll3-div2"; }; - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; - mux2: mux2@40 { #clock-cells = <0>; reg = <0x40 0x4>; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 4c4a2b0..5583421 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -337,53 +337,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 67296fd..6e4cd6c 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -297,53 +297,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi new file mode 100644 index 0000000..4871048 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi @@ -0,0 +1,78 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-1.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + clock-frequency = <0>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-1.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2"; + }; + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux0"; + }; + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux1"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi new file mode 100644 index 0000000..5d18d2a --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi @@ -0,0 +1,61 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-2.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-2.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2", "pll0-div4"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2", "pll1-div4"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 12e597e..15ae462 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -281,35 +281,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index aecee96..1ce91e3 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -305,34 +305,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 7e2fc7c..0e96fca 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -368,34 +368,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; pll2: pll2@840 { #clock-cells = <1>; diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts index bc12127a..decaf35 100644 --- a/arch/powerpc/boot/dts/t4240emu.dts +++ b/arch/powerpc/boot/dts/t4240emu.dts @@ -250,9 +250,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" -- cgit v0.10.2 From f1aa77c9703148fad7d819d9d764dae0cb82d141 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:12 -0600 Subject: dt/bindings: qoriq-clock: Add binding for the platform PLL Signed-off-by: Emil Medve Change-Id: I7950afa9650d15ec7ce2cca89bb2a1e38586d4a5 Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt index 5666812..266ff9d 100644 --- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt +++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt @@ -62,6 +62,8 @@ Required properties: It takes parent's clock-frequency as its clock. * "fsl,qoriq-sysclk-2.0": for input system clock (v2.0). It takes parent's clock-frequency as its clock. + * "fsl,qoriq-platform-pll-1.0" for the platform PLL clock (v1.0) + * "fsl,qoriq-platform-pll-2.0" for the platform PLL clock (v2.0) - #clock-cells: From common clock binding. The number of cells in a clock-specifier. Should be <0> for "fsl,qoriq-sysclk-[1,2].0" clocks, or <1> for "fsl,qoriq-core-pll-[1,2].0" clocks. @@ -128,8 +130,16 @@ Example for clock block and clock provider: clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; clock-output-names = "cmux1"; }; + + platform-pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; - } +}; Example for clock consumer: @@ -139,4 +149,4 @@ Example for clock consumer: clocks = <&mux0>; ... }; - } +}; -- cgit v0.10.2 From 58810cb7f66e47fce2e8945deeab5a4226e3975c Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 6 Nov 2014 09:48:13 -0600 Subject: powerpc/dts: Add node(s) for the platform PLL Signed-off-by: Emil Medve Change-Id: If76cd705a01813abe53396c1486bc13c4289ee92 Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi index 4871048..4ece1ed 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi @@ -75,4 +75,11 @@ global-utilities@e1000 { clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; clock-output-names = "cmux1"; }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi index 5d18d2a..48e0b6e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi @@ -58,4 +58,11 @@ global-utilities@e1000 { clocks = <&sysclk>; clock-output-names = "pll1", "pll1-div2", "pll1-div4"; }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; }; -- cgit v0.10.2 From 10239733ee8617bac3f1c1769af43a88ed979324 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:33 +1000 Subject: powerpc: Remove bootmem allocator At the moment we transition from the memblock alloctor to the bootmem allocator. Gitting rid of the bootmem allocator removes a bunch of complicated code (most of which I owe the dubious honour of being responsible for writing). Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4..98e9c54 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select NO_BOOTMEM config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 11ba86e..fbdf18c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern int init_bootmem_done; /* set once bootmem is available */ extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -24,7 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); -void do_init_bootmem(void); +void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 07831ed..88a36e6 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -311,9 +311,8 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + initmem_init(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 615aa90..91d9cfa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -689,8 +689,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); + initmem_init(); sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 415a51b..3b31004 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -195,15 +195,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2add0b7..e076d19 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -61,7 +61,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -190,70 +189,22 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; #endif - /* - * Find an area to use for the bootmem bitmap. Calculate the size of - * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. - * Add 1 additional page in case the address isn't page-aligned. - */ - bootmap_pages = bootmem_bootmap_pages(total_pages); - - start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - - min_low_pfn = MEMORY_START >> PAGE_SHIFT; - boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); - /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - /* Add all physical memory to the bootmem map, mark each area - * present. - */ -#ifdef CONFIG_HIGHMEM - free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - if (top < lowmem_end_addr) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - else if (reg->base < lowmem_end_addr) { - unsigned long trunc_size = lowmem_end_addr - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } -#else - free_bootmem_with_active_regions(0, max_pfn); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); - - init_bootmem_done = 1; } /* mark pages that don't exist as nosave */ @@ -369,14 +320,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -static void __init register_page_bootmem_info(void) -{ - int i; - - for_each_online_node(i) - register_page_bootmem_info_node(NODE_DATA(i)); -} - void __init mem_init(void) { /* @@ -389,7 +332,6 @@ void __init mem_init(void) swiotlb_init(0); #endif - register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9d1dfd..2e9ad2d7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -/* - * get_node_active_region - Return active region containing pfn - * Active range returned is empty if none found. - * @pfn: The page to return the region for - * @node_ar: Returned set to the active region containing @pfn - */ -static void __init get_node_active_region(unsigned long pfn, - struct node_active_region *node_ar) -{ - unsigned long start_pfn, end_pfn; - int i, nid; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (pfn >= start_pfn && pfn < end_pfn) { - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = end_pfn; - break; - } - } -} - static void reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) } } -/* - * Allocate some memory, satisfying the memblock or bootmem allocator where - * required. nid is the preferred node and end is the physical address of - * the highest address in the node. - * - * Returns the virtual address of the memory. - */ -static void __init *careful_zallocation(int nid, unsigned long size, - unsigned long align, - unsigned long end_pfn) -{ - void *ret; - int new_nid; - unsigned long ret_paddr; - - ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); - - /* retry over all memory */ - if (!ret_paddr) - ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); - - if (!ret_paddr) - panic("numa.c: cannot allocate %lu bytes for node %d", - size, nid); - - ret = __va(ret_paddr); - - /* - * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the MEMBLOCK allocator to the - * bootmem allocator. If this function is called for - * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the MEMBLOCK allocator. - * - * So, check the nid from which this allocation came - * and double check to see if we need to use bootmem - * instead of the MEMBLOCK. We don't free the MEMBLOCK memory - * since it would be useless. - */ - new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); - if (new_nid < nid) { - ret = __alloc_bootmem_node(NODE_DATA(new_nid), - size, align, 0); - - dbg("alloc_bootmem %p %lx\n", ret, size); - } - - memset(ret, 0, size); - return ret; -} - static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; -static void __init mark_reserved_regions_for_nid(int nid) +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { - struct pglist_data *node = NODE_DATA(nid); - struct memblock_region *reg; - - for_each_memblock(reserved, reg) { - unsigned long physbase = reg->base; - unsigned long size = reg->size; - unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = PFN_UP(physbase + size); - struct node_active_region node_ar; - unsigned long node_end_pfn = pgdat_end_pfn(node); - - /* - * Check to make sure that this memblock.reserved area is - * within the bounds of the node that we care about. - * Checking the nid of the start and end points is not - * sufficient because the reserved area could span the - * entire node. - */ - if (end_pfn <= node->node_start_pfn || - start_pfn >= node_end_pfn) - continue; - - get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn && - node_ar.start_pfn < node_ar.end_pfn) { - unsigned long reserve_size = size; - /* - * if reserved region extends past active region - * then trim size to active region - */ - if (end_pfn > node_ar.end_pfn) - reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - physbase; - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } - /* - * if reserved region is contained in the active region - * then done. - */ - if (end_pfn <= node_ar.end_pfn) - break; - - /* - * reserved region extends past the active region - * get next active region that contains this - * reserved region - */ - start_pfn = node_ar.end_pfn; - physbase = start_pfn << PAGE_SHIFT; - size = size - reserve_size; - get_node_active_region(start_pfn, &node_ar); - } - } + u64 spanned_pages = end_pfn - start_pfn; + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + if (spanned_pages) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d\n", nid); + + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); + + /* report and initialize */ + pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = spanned_pages; } - -void __init do_init_bootmem(void) +void __init initmem_init(void) { int nid, cpu; - min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; @@ -1064,64 +956,16 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - void *bootmem_vaddr; - unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - - /* - * Allocate the node structure node local if possible - * - * Be careful moving this around, as it relies on all - * previous nodes' bootmem to be initialized and have - * all reserved areas marked. - */ - NODE_DATA(nid) = careful_zallocation(nid, - sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn); - - dbg("node %d\n", nid); - dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - NODE_DATA(nid)->node_start_pfn = start_pfn; - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - - if (NODE_DATA(nid)->node_spanned_pages == 0) - continue; - - dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); - dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_zallocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_pfn); - - dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - - init_bootmem_node(NODE_DATA(nid), - __pa(bootmem_vaddr) >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - /* - * Be very careful about moving this around. Future - * calls to careful_zallocation() depend on this getting - * done correctly. - */ - mark_reserved_regions_for_nid(nid); + setup_node_data(nid, start_pfn, end_pfn); sparse_memory_present_with_active_regions(nid); } - init_bootmem_done = 1; - - /* - * Now bootmem is initialised we can create the node to cpumask - * lookup tables and setup the cpu callback to populate them. - */ setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342..d545b12 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add { pte_t *pte; extern int mem_init_done; - extern void *early_get_page(void); if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } else { - pte = (pte_t *)early_get_page(); + pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) clear_page(pte); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709a..cdb19ab 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -75,11 +75,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; - if (init_bootmem_done) - pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); - else - pt = __va(memblock_alloc_base(size, size, - __pa(MAX_DMA_ADDRESS))); + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); return pt; diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index e996e00..711f3d3 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -297,14 +297,13 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to reserve_bootmem(). It will be + * also it's length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ if (diu_shared_fb.in_use) { - ret = reserve_bootmem(diu_shared_fb.fb_phys, - diu_shared_fb.fb_len, - BOOTMEM_EXCLUSIVE); + ret = memblock_reserve(diu_shared_fb.fb_phys, + diu_shared_fb.fb_len); if (ret) { pr_err("%s: reserve bootmem failed\n", __func__); diu_shared_fb.in_use = false; -- cgit v0.10.2 From 14ed740957704e8768523899e0fa31972577bf65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:34 +1000 Subject: powerpc: Remove some old bootmem related comments Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 099f27e..6af05fc 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -696,10 +696,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* - * Ensure that total memory size is page-aligned, because otherwise - * mark_bootmem() gets upset. - */ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857..4af905e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4fdc27c..e64868c 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f0..b460e72 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab..aa91737 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized - * which our ppc64 code is keen to do that, we'll need to - * fix it and/or be more careful - */ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { -- cgit v0.10.2 From 68cf0d642f62267b960f947370539ff3582c4935 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:35 +1000 Subject: powerpc: Remove superfluous bootmem includes Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index c78e6da..cfa0f81 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8d87bb1..4509603 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013d..ba0f2d6 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7c55b86..ce230da 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 88a36e6..ba3a3c3 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351..305eb0d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e64868c..3f1bb5a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3b31004..a10be66 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556..10471f9 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa91737..e0c7185 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -51,6 +50,7 @@ #include #include #include +#include #include "mmu_decl.h" diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index 844c0fa..9438bbe 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 4278acf..f223875 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index f7136aa..d3a1306 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 7e868cc..04702db 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index eba9cb1..db3803e 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b2187d0..ba74527 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48..8f37204 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -13,7 +13,6 @@ * */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b50f978..b287337 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 89cec0e..c4648ad5 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 15dccd3..428de9d 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -16,7 +16,6 @@ #undef DEBUG #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 623d7fb..4ce8f54 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 22b5200..a59f289 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -22,7 +22,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index df6e2fc..086aca6 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 238a07b..b584deb 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index b2b87c3..543765e 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 9203393..7c37157 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 21098b9e07476deb3f40acd7e51cffbffb4ef865 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:36 +1000 Subject: powerpc: Move sparse_init() into initmem_init We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 91d9cfa..6e5310d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -690,7 +690,6 @@ void __init setup_arch(char **cmdline_p) emergency_stack_init(); initmem_init(); - sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e076d19..b7285a5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -205,6 +205,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2e9ad2d7..417b0a5 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -966,6 +966,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); -- cgit v0.10.2 From 6e4c632cdff7bf0238a2543dfe98bd1ad40313c2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 22:15:37 +1000 Subject: powerpc: make __ffs return unsigned long I'm seeing a build warning in mm/nobootmem.c after removing bootmem: mm/nobootmem.c: In function '__free_pages_memory': include/linux/kernel.h:713:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (&_min1 == &_min2); \ ^ mm/nobootmem.c:90:11: note: in expansion of macro 'min' order = min(MAX_ORDER - 1UL, __ffs(start)); ^ The rest of the worlds seems to define __ffs as returning unsigned long, so lets do that. Signed-off-by: Anton Blanchard Tested-by: Emil Medve Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index bd3bd57..c633f058 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -213,7 +213,7 @@ static __inline__ unsigned long ffz(unsigned long x) return __ilog2(x & -x); } -static __inline__ int __ffs(unsigned long x) +static __inline__ unsigned long __ffs(unsigned long x) { return __ilog2(x & -x); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 65d2ed4..d8484d7 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -152,7 +152,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, flags |= 0x10000000; /* enable relaxed ordering */ for (i = 0; size > 0; i++) { - unsigned int bits = min(ilog2(size), + unsigned int bits = min_t(u32, ilog2(size), __ffs(pci_addr | phys_addr)); if (index + i >= 5) -- cgit v0.10.2 From 7d56c65a6ff9065c459fc63c509950d8ea66e00c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 17:07:03 +1000 Subject: powerpc/ftrace: Remove mod_return_to_handler mod_return_to_handler is the same as return_to_handler, except it handles the change of the TOC (r2). Add this into return_to_handler and remove mod_return_to_handler. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0905c8d..66f7ed3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1241,28 +1241,6 @@ _GLOBAL(ftrace_graph_caller) _GLOBAL(return_to_handler) /* need to save return values */ - std r4, -24(r1) - std r3, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -24(r1) - ld r3, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr - -_GLOBAL(mod_return_to_handler) - /* need to save return values */ std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -1272,7 +1250,7 @@ _GLOBAL(mod_return_to_handler) stdu r1, -112(r1) /* - * We are in a module using the module's TOC. + * We might be called from a module. * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c..abf7921 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -510,10 +510,6 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ -#ifdef CONFIG_PPC64 -extern void mod_return_to_handler(void); -#endif - /* * Hook the return address and push it in the stack of return addrs * in current thread info. @@ -523,7 +519,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long old; int faulted; struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) return; @@ -531,13 +527,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; -#ifdef CONFIG_PPC64 - /* non core kernel code needs to save and restore the TOC */ - if (REGION_ID(self_addr) != KERNEL_REGION_ID) - return_hooker = (unsigned long)&mod_return_to_handler; -#endif - - return_hooker = ppc_function_entry((void *)return_hooker); + return_hooker = ppc_function_entry(return_to_handler); /* * Protect against fault, even if it shouldn't diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8c2691e..f6b8215 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1527,13 +1527,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) int curr_frame = current->curr_ret_stack; extern void return_to_handler(void); unsigned long rth = (unsigned long)return_to_handler; - unsigned long mrth = -1; -#ifdef CONFIG_PPC64 - extern void mod_return_to_handler(void); - rth = *(unsigned long *)rth; - mrth = (unsigned long)mod_return_to_handler; - mrth = *(unsigned long *)mrth; -#endif #endif sp = (unsigned long) stack; @@ -1558,7 +1551,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if ((ip == rth || ip == mrth) && curr_frame >= 0) { + if ((ip == rth) && curr_frame >= 0) { printk(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; -- cgit v0.10.2 From b3c18725a0eb7ea2458e9ae3b7e5a477f52e361f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 17:07:04 +1000 Subject: powerpc/ftrace: simplify prepare_ftrace_return Instead of passing in the stack address of the link register to be modified, just pass in the old value and return the new value and rely on ftrace_graph_caller to do the modification. This removes the exception handling around the stack update - it isn't needed and we weren't consistent about it. Later on we would do an unprotected modification: if (!ftrace_graph_entry(&trace)) { *parent = old; Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 22b45a4..ad837d8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1424,12 +1424,18 @@ _GLOBAL(ftrace_graph_caller) lwz r4, 44(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ - addi r3, r1, 52 + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66f7ed3..194e46d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1227,13 +1227,20 @@ _GLOBAL(ftrace_graph_caller) ld r4, 128(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ + /* Grab the LR out of the caller stack frame */ ld r11, 112(r1) - addi r3, r11, 16 + ld r3, 16(r11) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + ld r0, 128(r1) mtlr r0 addi r1, r1, 112 diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index abf7921..d795031 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -512,67 +512,34 @@ int ftrace_disable_ftrace_graph_caller(void) /* * Hook the return address and push it in the stack of return addrs - * in current thread info. + * in current thread info. Return the address we want to divert to. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - unsigned long old; - int faulted; struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) - return; + goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; + goto out; return_hooker = ppc_function_entry(return_to_handler); - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " PPC_LL "%[old], 0(%[parent])\n" - "2: " PPC_STL "%[return_hooker], 0(%[parent])\n" - " li %[faulted], 0\n" - "3:\n" - - ".section .fixup, \"ax\"\n" - "4: li %[faulted], 1\n" - " b 3b\n" - ".previous\n" - - ".section __ex_table,\"a\"\n" - PPC_LONG_ALIGN "\n" - PPC_LONG "1b,4b\n" - PPC_LONG "2b,4b\n" - ".previous" - - : [old] "=&r" (old), [faulted] "=r" (faulted) - : [parent] "r" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } - - trace.func = self_addr; + trace.func = ip; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; - return; - } + if (!ftrace_graph_entry(&trace)) + goto out; + + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; - if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) - *parent = old; + parent = return_hooker; +out: + return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v0.10.2 From dedd24a12fe6735898feeb06184ee346907abb5d Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sat, 25 May 2013 12:54:25 -0400 Subject: powerpc: Remove unused devm_ioremap_prot() Added in 2008, but has never had any in-tree users, and no other architectures provide it. Signed-off-by: Kyle McMartin Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 97d3869..5f0ad89 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -851,9 +851,6 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags); - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9f342f1..597562f 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -12,7 +12,6 @@ CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ usercopy_64.o mem_64.o string.o \ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c deleted file mode 100644 index 8df55fc..0000000 --- a/arch/powerpc/lib/devres.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include /* devres_*(), devm_ioremap_release() */ -#include -#include /* ioremap_prot() */ -#include /* EXPORT_SYMBOL() */ - -/** - * devm_ioremap_prot - Managed ioremap_prot() - * @dev: Generic device to remap IO address for - * @offset: BUS offset to map - * @size: Size of map - * @flags: Page flags - * - * Managed ioremap_prot(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags) -{ - void __iomem **ptr, *addr; - - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; - - addr = ioremap_prot(offset, size, flags); - if (addr) { - *ptr = addr; - devres_add(dev, ptr); - } else - devres_free(ptr); - - return addr; -} -EXPORT_SYMBOL(devm_ioremap_prot); -- cgit v0.10.2 From 60878dfb11d47a9029581543ee2d2101faa88072 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 29 Apr 2014 09:24:06 +0200 Subject: powerpc/ftrace: Fix obsolete comment CONFIG_MCOUNT is not defined anymore, the corresponding #ifdef there is CONFIG_FUNCTION_TRACER. Signed-off-by: Jiri Slaby Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ad837d8..10a0935 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1463,4 +1463,4 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_MCOUNT */ +#endif /* CONFIG_FUNCTION_TRACER */ -- cgit v0.10.2 From 66f3d4fe0ba8f38df6141cc0b6ec145be6e63f0e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 23 Oct 2014 16:35:14 +1100 Subject: powerpc: Remove CPU_FTR_HVMODE from CPU_FTRS_ALWAYS We potentially clear CPU_FTR_HVMODE at runtime in __init_hvmode_206(), so we must make sure it's not set in CPU_FTRS_ALWAYS. This doesn't hurt us in practice at the moment, because we don't support compiling only for CPUs that support CPU_FTR_HVMODE. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index daa5af9..42d8b33 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -511,7 +511,7 @@ enum { (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE) + CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE) #endif #else enum { -- cgit v0.10.2 From 90029640fd5963343fb862d419db161bc0424120 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Aug 2014 18:26:28 +1000 Subject: powerpc: Remove unused CPU_FTRS_A2 In commit fb5a515704d7 "Remove platforms/wsp and associated pieces" we removed the last user of CPU_FTRS_A2, so we should remove it too. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 42d8b33..22d5a7d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -448,13 +448,9 @@ extern const char *powerpc_base_platform; CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) -#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ - CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_ICSWX | CPU_FTR_DABRX ) - #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ @@ -505,7 +501,7 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ -- cgit v0.10.2 From 4b4b13d5fec8a82ed2780c487e49cfc4321a8c14 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Tue, 28 Oct 2014 12:19:00 +0100 Subject: powerpc/boot: Parse chosen/cmdline-timeout parameter On some platforms a 5 second timeout during boot might be quite long, so make it configurable. Run the loop at least once to let the user stop the boot by holding a key pressed. If the timeout is set to 0, don't wait for input, which can be used as a workaround if the boot hangs on random data coming in on the serial port. Signed-off-by: Simon Kagstrom [mpe: Changelog wording & whitespace] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index d367a0a..d80161b 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -144,13 +144,24 @@ static char cmdline[BOOT_COMMAND_LINE_SIZE] static void prep_cmdline(void *chosen) { + unsigned int getline_timeout = 5000; + int v; + int n; + + /* Wait-for-input time */ + n = getprop(chosen, "linux,cmdline-timeout", &v, sizeof(v)); + if (n == sizeof(v)) + getline_timeout = v; + if (cmdline[0] == '\0') getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1); printf("\n\rLinux/PowerPC load: %s", cmdline); + /* If possible, edit the command line */ - if (console_ops.edit_cmdline) - console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE); + if (console_ops.edit_cmdline && getline_timeout) + console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE, getline_timeout); + printf("\n\r"); /* Put the command line back into the devtree for the kernel */ diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 8aad3c5..5e75e1c 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -58,7 +58,7 @@ extern struct dt_ops dt_ops; struct console_ops { int (*open)(void); void (*write)(const char *buf, int len); - void (*edit_cmdline)(char *buf, int len); + void (*edit_cmdline)(char *buf, int len, unsigned int getline_timeout); void (*close)(void); void *data; }; diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index f2156f0..167ee94 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -33,7 +33,7 @@ static void serial_write(const char *buf, int len) scdp->putc(*buf++); } -static void serial_edit_cmdline(char *buf, int len) +static void serial_edit_cmdline(char *buf, int len, unsigned int timeout) { int timer = 0, count; char ch, *cp; @@ -44,7 +44,7 @@ static void serial_edit_cmdline(char *buf, int len) cp = &buf[count]; count++; - while (timer++ < 5*1000) { + do { if (scdp->tstc()) { while (((ch = scdp->getc()) != '\n') && (ch != '\r')) { /* Test for backspace/delete */ @@ -70,7 +70,7 @@ static void serial_edit_cmdline(char *buf, int len) break; /* Exit 'timer' loop */ } udelay(1000); /* 1 msec */ - } + } while (timer++ < timeout); *cp = 0; } -- cgit v0.10.2 From 1bc9e47aa8e4b0f9629e218d4c7667308ca6af67 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 30 Oct 2014 15:43:43 +1100 Subject: powerpc/jump_label: Use HAVE_JUMP_LABEL Commit d4fe0965e208 ("powerpc/jump_label: use HAVE_JUMP_LABEL?") missed a few conversions. Change the remaining uses of CONFIG_JUMP_LABEL to HAVE_JUMP_LABEL. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index feb549a..cf7266d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -18,7 +18,7 @@ .section ".text" #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define OPAL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) #else diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 3fda3f1..ccd53f9 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -18,7 +18,7 @@ #ifdef CONFIG_TRACEPOINTS -#ifndef CONFIG_JUMP_LABEL +#ifndef HAVE_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -78,7 +78,7 @@ hcall_tracepoint_refcount: mr r5,BUFREG; \ __HCALL_INST_POSTCALL -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define HCALL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) #else -- cgit v0.10.2 From e88f157de5e0b327b1b26e3260de4b16c07e2d19 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 27 Oct 2014 20:56:14 +1100 Subject: powerpc: Remove unused vgacon_remap_base & fix build break The build is broken with CONFIG_PPC32=y, CONFIG_FB_VGA16=y and CONFIG_VGA_CONSOLE=n. The problem is that vgacon_remap_base is not defined. It's used in: #define VGA_MAP_MEM(x,s) (x + vgacon_remap_base) Which is used in the vga16fb.c code. Digging down it seems vgacon_remap_base is never initialised. It used to be, back in arch/ppc (pplus.c and prep_setup.c), but none of that code ever made it to arch/powerpc. So given it's been unused for >6 years, remove it. Whether vga16fb.c works on 32-bit is another question, but this patch shouldn't affect it. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index a2eac40..e5f8dd3 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -38,12 +38,10 @@ static inline u16 scr_readw(volatile const u16 *addr) #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ -extern unsigned long vgacon_remap_base; - #ifdef __powerpc64__ #define VGA_MAP_MEM(x,s) ((unsigned long) ioremap((x), s)) #else -#define VGA_MAP_MEM(x,s) (x + vgacon_remap_base) +#define VGA_MAP_MEM(x,s) (x) #endif #define vga_readb(x) (*(x)) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ba3a3c3..bb02e9f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -52,11 +52,6 @@ unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; -#ifdef CONFIG_VGA_CONSOLE -unsigned long vgacon_remap_base; -EXPORT_SYMBOL(vgacon_remap_base); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. -- cgit v0.10.2 From a3e5b356b3ab40b00ecb0f6fde445db719c100b5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:25 +1100 Subject: powerpc: Don't use local named register variable in current_thread_info LLVM doesn't support local named register variables and is unlikely to. current_thread_info is using one, fix it by moving it out and calling it __current_r1(). I gave it a bit of an obscure name because we don't want anyone else using it - they should use current_stack_pointer(). This specific case is performance critical and we can't afford to call a function to get it. Furthermore it isn't important to know exactly where in the stack we are since we mask the lower bits. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index b034ecd..ebc4f16 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -71,13 +71,12 @@ struct thread_info { #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ +register unsigned long __current_r1 asm("r1"); static inline struct thread_info *current_thread_info(void) { - register unsigned long sp asm("r1"); - /* gcc4, at least, is smart enough to turn this into a single * rlwinm for ppc32 and clrrdi for ppc64 */ - return (struct thread_info *)(sp & ~(THREAD_SIZE-1)); + return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1)); } #endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From 6f791bef76d66923bc250cbbf4ddbf3f1c944686 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:26 +1100 Subject: powerpc: Remove double braces in alignment code. Looks like I introduced this when adding LE support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 34f5552..86150fb 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -908,7 +908,7 @@ int fix_alignment(struct pt_regs *regs) flush_fp_to_thread(current); } - if ((nb == 16)) { + if (nb == 16) { if (flags & F) { /* Special case for 16-byte FP loads and stores */ PPC_WARN_ALIGNMENT(fp_pair, regs); -- cgit v0.10.2 From ecaf5fa0a5096a237482ddc4b3c46becf531db1b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Oct 2014 14:47:27 +1100 Subject: powerpc: LLVM complains about forward declaration of struct rtas_sensors Move the declaration up to silence the warning. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 8777fb0..fb2fb3e 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -113,17 +113,6 @@ #define SENSOR_PREFIX "ibm,sensor-" #define cel_to_fahr(x) ((x*9/5)+32) - -/* Globals */ -static struct rtas_sensors sensors; -static struct device_node *rtas_node = NULL; -static unsigned long power_on_time = 0; /* Save the time the user set */ -static char progress_led[MAX_LINELENGTH]; - -static unsigned long rtas_tone_frequency = 1000; -static unsigned long rtas_tone_volume = 0; - -/* ****************STRUCTS******************************************* */ struct individual_sensor { unsigned int token; unsigned int quant; @@ -134,6 +123,15 @@ struct rtas_sensors { unsigned int quant; }; +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + /* ****************************************************************** */ /* Declarations */ static int ppc_rtas_sensors_show(struct seq_file *m, void *v); -- cgit v0.10.2 From 7aa189c8f57f2141b8655c2a13c7486d0844d490 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 3 Nov 2014 08:18:06 +1100 Subject: powerpc/pseries: Quieten ibm,pcie-link-speed-stats warning The ibm,pcie-link-speed-stats isn't mandatory, so we shouldn't print a high priority error message when missing. One example where we see this is QEMU. Reduce it to pr_debug. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 67e4859..fe16a50 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -134,7 +134,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) of_node_put(pdn); if (rc) { - pr_err("no ibm,pcie-link-speed-stats property\n"); + pr_debug("no ibm,pcie-link-speed-stats property\n"); return 0; } -- cgit v0.10.2 From 20f1aae6cb0120fa54c0fe81614f6983df3b420a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 3 Nov 2014 08:34:01 +1100 Subject: powerpc/pseries: Quieten relocation on exceptions warning The H_SET_MODE hcall returns H_P2 if a function is not implemented and all callers should handle this case. The call to enable relocation on exceptions currently prints an error message if the feature is not implemented. While H_SET_MODE was first introduced on POWER8 (which has relocation on exceptions), it has been now added on some POWER7 configurations (which does not). Check for H_P2 and print an informational message instead. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index db0fc0c..8b8fb19 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -499,7 +499,11 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_SET_MODE)) { long rc; - if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) { + + rc = pSeries_enable_reloc_on_exc(); + if (rc == H_P2) { + pr_info("Relocation on exceptions not supported\n"); + } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation on exceptions: " "%ld\n", rc); } -- cgit v0.10.2 From 0f9da5cb746581ae2de120c78a8afe29fb594fe4 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:44 +0200 Subject: powerpc/4xx/cpm: delete unneeded test before of_node_put Simplify the error path to avoid calling of_node_put when it is not needed. Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c index 82e2cfe..ba95adf 100644 --- a/arch/powerpc/sysdev/ppc4xx_cpm.c +++ b/arch/powerpc/sysdev/ppc4xx_cpm.c @@ -281,7 +281,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: could not parse dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } cpm.dcr_host = dcr_map(np, dcr_base, dcr_len); @@ -290,7 +290,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: failed to map dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } /* All 4xx SoCs with a CPM controller have one of two @@ -330,9 +330,9 @@ static int __init cpm_init(void) if (cpm.standby || cpm.suspend) suspend_set_ops(&cpm_suspend_ops); +node_put: + of_node_put(np); out: - if (np) - of_node_put(np); return ret; } -- cgit v0.10.2 From 4cdd641dddd0ecd93f7b84fc3622df83937f6094 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:45 +0200 Subject: powerpc/fsl: fsl_soc: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 1e04568..99269c0 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -197,8 +197,7 @@ static int __init setup_rstcr(void) if (!rstcr && ppc_md.restart == fsl_rstcr_restart) printk(KERN_ERR "No RSTCR register, warm reboot won't work\n"); - if (np) - of_node_put(np); + of_node_put(np); return 0; } -- cgit v0.10.2 From 756d36790ad0fa221af650095fa52764474854b7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:46 +0200 Subject: powerpc/mpc5xxx: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 5492dc5..f4f0301 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -26,8 +26,7 @@ unsigned long mpc5xxx_get_bus_frequency(struct device_node *node) of_node_put(node); node = np; } - if (node) - of_node_put(node); + of_node_put(node); return p_bus_freq ? *p_bus_freq : 0; } -- cgit v0.10.2 From 498b65147247f61f3f7f4deb35484ff98556e798 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:47 +0200 Subject: powerpc/pseries: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 49bcf7c..1cc3db9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -574,8 +574,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (isa_dn && isa_dn != dn) isa_dn = isa_dn->parent; - if (isa_dn_orig) - of_node_put(isa_dn_orig); + of_node_put(isa_dn_orig); /* Count number of direct PCI children of the PHB. */ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) -- cgit v0.10.2 From 6609ed14de75bde7a99b33e9be9f1873da91f07d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:48 +0200 Subject: powerpc/gamecube/wii: delete unneeded test before of_node_put Simplify the error path to avoid calling of_node_put when it is not needed. Signed-off-by: Julia Lawall Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index 20a8ed9..7feb325 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -247,7 +247,7 @@ void __init ug_udbg_init(void) np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi"); if (!np) { udbg_printf("%s: EXI node not found\n", __func__); - goto done; + goto out; } exi_io_base = ug_udbg_setup_exi_io_base(np); @@ -267,8 +267,8 @@ void __init ug_udbg_init(void) } done: - if (np) - of_node_put(np); + of_node_put(np); +out: return; } -- cgit v0.10.2 From cd32e2dcc9de6c27ecbbfc0e2079fb64b42bad5f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Nov 2014 09:12:28 +1100 Subject: powerpc: Fix bad NULL pointer check in udbg_uart_getc_poll() We have some code in udbg_uart_getc_poll() that tries to protect against a NULL udbg_uart_in, but gets it all wrong. Found with the LLVM static analyzer (scan-build). Fixes: 309257484cc1 ("powerpc: Cleanup udbg_16550 and add support for LPC PIO-only UARTs") Signed-off-by: Anton Blanchard [mpe: Add some newlines for readability while we're here] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 6e7c492..411116c 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -69,8 +69,12 @@ static void udbg_uart_putc(char c) static int udbg_uart_getc_poll(void) { - if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + if (!udbg_uart_in) + return -1; + + if (!(udbg_uart_in(UART_LSR) & LSR_DR)) return udbg_uart_in(UART_RBR); + return -1; } -- cgit v0.10.2 From d1d5304fcbc519f5fb12cbcca9c4fdd1d94447d7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:29:28 +1100 Subject: powerpc/mm: Use PAGE_FACTOR PAGE_FACTOR was defined to reflect the difference between configured page size and fixed 4KB page size. Replace (PAGE_SHIFT - HW_PAGE_SHIFT) with PAGE_FACTOR. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 057cbbb..5094f32 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -514,7 +514,7 @@ htab_insert_pte: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- htab_special_pfn - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT @@ -544,7 +544,7 @@ htab_call_hpte_insert1: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- 3f - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 3: sldi r5,r5,HW_PAGE_SHIFT -- cgit v0.10.2 From 8b91a2554610ac5e341de8fb0b5715fe90a0f2e2 Mon Sep 17 00:00:00 2001 From: "Suresh E. Warrier" Date: Mon, 3 Nov 2014 15:46:42 +1100 Subject: powerpc: Save/restore PPR for KVM hypercalls The system call FLIH (first-level interrupt handler) at 0xc00 unconditionally sets hardware priority to medium. For hypercalls, this means we lose guest OS priority. The front end (do_kvm_0x**) to the KVM interrupt handler always assumes that PPR priority is saved in PACA exception save area, so it copies this to the kvm_hstate structure. For hypercalls, this would be the saved priority from any previous exception. Eventually, the guest gets resumed with an incorrect priority. The fix is to save the PPR priority in PACA exception save area before switching HMT priorities in the FLIH so that existing code described above in the KVM interrupt handler can copy it from there into the VCPU's saved context. Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras [mpe: Dropped HMT_MEDIUM_PPR_DISCARD and reworded comment] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 72e783e..a1d45c1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -292,15 +292,26 @@ decrementer_pSeries: . = 0xc00 .globl system_call_pSeries system_call_pSeries: - HMT_MEDIUM + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER SET_SCRATCH0(r13) GET_PACA(r13) std r9,PACA_EXGEN+EX_R9(r13) + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); + HMT_MEDIUM; std r10,PACA_EXGEN+EX_R10(r13) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); mfcr r9 KVMTEST(0xc00) GET_SCRATCH0(r13) +#else + HMT_MEDIUM; #endif SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID -- cgit v0.10.2 From 7048c846949ac1feb09d1b624ea0e8c351d92a7b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 3 Nov 2014 15:46:43 +1100 Subject: powerpc: Fix compilation of emulate_step() Commit be96f63375a1 ("powerpc: Split out instruction analysis part of emulate_step()") added some calls to do_fp_load() and do_fp_store(), which fail to compile on configs with CONFIG_PPC_FPU=n and CONFIG_PPC_EMULATE_SSTEP=y. This fixes the compile by adding #ifdef CONFIG_PPC_FPU around the code that calls these functions. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 54651fc..dc885b3 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1865,6 +1865,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } goto ldst_done; +#ifdef CONFIG_PPC_FPU case LOAD_FP: if (regs->msr & MSR_LE) return 0; @@ -1873,7 +1874,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (regs->msr & MSR_LE) @@ -1919,6 +1920,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = write_mem(op.val, op.ea, size, regs); goto ldst_done; +#ifdef CONFIG_PPC_FPU case STORE_FP: if (regs->msr & MSR_LE) return 0; @@ -1927,7 +1929,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (regs->msr & MSR_LE) -- cgit v0.10.2 From 608b286d1ddf38a7ceb624d2b689af095816d91c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 6 Nov 2014 11:38:27 +0800 Subject: powerpc/powernv: Add OPAL IPMI interface Recent OPAL firmare adds a couple of functions to send and receive IPMI messages: https://github.com/open-power/skiboot/commit/b2a374da This change updates the token list and wrappers to suit, and adds the platform devices for any IPMI interfaces. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9124b0e..5d073e5 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,8 @@ struct opal_sg_list { #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 +#define OPAL_IPMI_SEND 107 +#define OPAL_IPMI_RECV 108 #ifndef __ASSEMBLY__ @@ -452,6 +454,17 @@ struct opal_msg { __be64 params[8]; }; +enum { + OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1, +}; + +struct opal_ipmi_msg { + uint8_t version; + uint8_t netfn; + uint8_t cmd; + uint8_t data[]; +}; + struct opal_machine_check_event { enum OpalMCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -963,6 +976,10 @@ int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); +int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t msg_len); +int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t *msg_len); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index feb549a..4f4e4009 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -250,3 +250,5 @@ OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d019b08..0297702 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -644,6 +644,16 @@ static void __init opal_dump_region_init(void) pr_warn("DUMP: Failed to register kernel log buffer. " "rc = %d\n", rc); } + +static void opal_ipmi_init(struct device_node *opal_node) +{ + struct device_node *np; + + for_each_child_of_node(opal_node, np) + if (of_device_is_compatible(np, "ibm,opal-ipmi")) + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -707,6 +717,8 @@ static int __init opal_init(void) opal_msglog_init(); } + opal_ipmi_init(opal_node); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -742,6 +754,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_ipmi_send); +EXPORT_SYMBOL_GPL(opal_ipmi_recv); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, -- cgit v0.10.2 From e7a7a65ed2e90c41341aafef9458d0aa8143a558 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 11 Nov 2014 12:50:22 +0800 Subject: powerpc: Fix comment typos in arch/powerpc/include/asm/bitops.h In arch/powerpc/include/asm/bitops.h, the comments about bit numbers in large (> 1 word) bitmaps have two typos: - On ppc64 system, the LSB of the 4th word should be bit 192 rather than 196, because if it's bit 196, bit 192-195 will be missing in the bitmap. - On ppc32 system, the LSB of the second word should be bit 32 rather than 31, because bit 31 is already in the first word. This patch fixes these typos. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index c633f058..59abc62 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -14,9 +14,9 @@ * * The bitop functions are defined to work on unsigned longs, so for a * ppc64 system the bits end up numbered: - * |63..............0|127............64|191...........128|255...........196| + * |63..............0|127............64|191...........128|255...........192| * and on ppc32: - * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224| + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * * There are a few little-endian macros used mostly for filesystem * bitmaps, these work on similar bit arrays layouts, but -- cgit v0.10.2 From d8ee6f34fdd28ade6b7b3a9be175bc5c0c3cfb38 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Nov 2014 16:54:54 +1100 Subject: powerpc/xmon: Fix build when 4xx=y and 44x=n dump_tlb_44x() is only defined when 44x=y, but the ifdef in xmon.c checks for 4xx, leading to a build failure: arch/powerpc/xmon/xmon.c:912:4: error: implicit declaration of function 'dump_tlb_44x' Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 506d256..ef18021 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -907,7 +907,7 @@ cmds(struct pt_regs *excp) case 'u': dump_segments(); break; -#elif defined(CONFIG_4xx) +#elif defined(CONFIG_44x) case 'u': dump_tlb_44x(); break; -- cgit v0.10.2 From 31ceb157f294843563330658c3ad6e3b5a1c4fe2 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:51 -0600 Subject: dt/bindings: Introduce the FSL QorIQ DPAA BMan The Buffer Manager is part of the Data-Path Acceleration Architecture (DPAA). BMan supports hardware allocation and deallocation of buffers belonging to pools originally created by software with configurable depletion thresholds. This binding covers the CCSR space programming model Signed-off-by: Emil Medve Change-Id: I3ec479bfb3c91951e96902f091f5d7d2adbef3b2 Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt b/Documentation/devicetree/bindings/soc/fsl/bman.txt new file mode 100644 index 0000000..9f80bf8 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt @@ -0,0 +1,125 @@ +QorIQ DPAA Buffer Manager Device Tree Bindings + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - BMan Node + - BMan Private Memory Node + - Example + +BMan Node + +The Buffer Manager is part of the Data-Path Acceleration Architecture (DPAA). +BMan supports hardware allocation and deallocation of buffers belonging to pools +originally created by software with configurable depletion thresholds. This +binding covers the CCSR space programming model + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,bman" + May include "fsl,-bman" + +- reg + Usage: Required + Value type: + Definition: Registers region within the CCSR address space + +The BMan revision information is located in the BMAN_IP_REV_1/2 registers which +are located at offsets 0xbf8 and 0xbfc + +- interrupts + Usage: Required + Value type: + Definition: Standard property. The error interrupt + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +Devices connected to a BMan instance via Direct Connect Portals (DCP) must link +to the respective BMan instance + +- fsl,bman + Usage: Required + Value type: + Description: List of phandle and DCP index pairs, to the BMan instance + to which this device is connected via the DCP + +BMan Private Memory Node + +BMan requires a contiguous range of physical memory used for the backing store +for BMan Free Buffer Proxy Records (FBPR). This memory is reserved/allocated as a +node under the /reserved-memory node + +The BMan FBPR memory node must be named "bman-fbpr" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,bman-fbpr" + +The following constraints are relevant to the FBPR private memory: + - The size must be 2^(size + 1), with size = 11..33. That is 4 KiB to + 16 GiB + - The alignment must be a muliptle of the memory size + +The size of the FBPR must be chosen by observing the hardware features configured +via the Reset Configuration Word (RCW) and that are relevant to a specific board +(e.g. number of MAC(s) pinned-out, number of offline/host command FMan ports, +etc.). The size configured in the DT must reflect the hardware capabilities and +not the specific needs of an application + +For additional details about reserved memory regions see reserved-memory.txt + +EXAMPLE + +The example below shows a BMan FBPR dynamic allocation memory node + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + compatible = "fsl,bman-fbpr"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + }; + +The example below shows a (P4080) BMan CCSR-space node + + crypto@300000 { + ... + fsl,bman = <&bman, 2>; + ... + }; + + bman: bman@31a000 { + compatible = "fsl,bman"; + reg = <0x31a000 0x1000>; + interrupts = <16 2 1 2>; + fsl,liodn = <0x17>; + memory-region = <&bman_fbpr>; + }; + + fman@400000 { + ... + fsl,bman = <&bman, 0>; + ... + }; -- cgit v0.10.2 From 5f3af4008bfcb388a3193fd871e1b2e94ba4b09c Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:52 -0600 Subject: dt/bindings: Introduce the FSL QorIQ DPAA BMan portal(s) Portals are memory mapped interfaces to BMan that allow low-latency, lock-less interaction by software running on processor cores, accelerators and network interfaces with the BMan Signed-off-by: Emil Medve Change-Id: I6d245ffc14ba3d0e91d403ac7c3b91b75a9e6a95 Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt new file mode 100644 index 0000000..2a00e14 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt @@ -0,0 +1,56 @@ +QorIQ DPAA Buffer Manager Portals Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - BMan Portal + - Example + +BMan Portal Node + +Portals are memory mapped interfaces to BMan that allow low-latency, lock-less +interaction by software running on processor cores, accelerators and network +interfaces with the BMan + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,bman-portal-" + May include "fsl,-bman-portal" or "fsl,bman-portal" + +- reg + Usage: Required + Value type: + Definition: Two regions. The first is the cache-enabled region of + the portal. The second is the cache-inhibited region of + the portal + +- interrupts + Usage: Required + Value type: + Definition: Standard property + +EXAMPLE + +The example below shows a (P4080) BMan portals container/bus node with two portals + + bman-portals@ff4000000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges = <0 0xf 0xf4000000 0x200000>; + + bman-portal@0 { + compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <105 2 0 0>; + }; + bman-portal@4000 { + compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <107 2 0 0>; + }; + }; -- cgit v0.10.2 From 76a4f03f3ec7221112c20d053a1233540a601473 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:53 -0600 Subject: dt/bindings: Introduce the FSL QorIQ DPAA QMan The Queue Manager is part of the Data-Path Acceleration Architecture (DPAA). QMan supports queuing and QoS scheduling of frames to CPUs, network interfaces and DPAA logic modules, maintains packet ordering within flows. Besides providing flow-level queuing, is also responsible for congestion management functions such as RED/WRED, congestion notifications and tail discards. This binding covers the CCSR space programming model Signed-off-by: Emil Medve Change-Id: I3acb223893e42003d6c9dc061db568ec0b10d29b Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt b/Documentation/devicetree/bindings/soc/fsl/qman.txt new file mode 100644 index 0000000..063e3a0 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt @@ -0,0 +1,165 @@ +QorIQ DPAA Queue Manager Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - QMan Node + - QMan Private Memory Nodes + - Example + +QMan Node + +The Queue Manager is part of the Data-Path Acceleration Architecture (DPAA). QMan +supports queuing and QoS scheduling of frames to CPUs, network interfaces and +DPAA logic modules, maintains packet ordering within flows. Besides providing +flow-level queuing, is also responsible for congestion management functions such +as RED/WRED, congestion notifications and tail discards. This binding covers the +CCSR space programming model + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman" + May include "fsl,-qman" + +- reg + Usage: Required + Value type: + Definition: Registers region within the CCSR address space + +The QMan revision information is located in the QMAN_IP_REV_1/2 registers which +are located at offsets 0xbf8 and 0xbfc + +- interrupts + Usage: Required + Value type: + Definition: Standard property. The error interrupt + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +- clocks + Usage: See clock-bindings.txt and qoriq-clock.txt + Value type: + Definition: Reference input clock. Its frequency is half of the + platform clock + +Devices connected to a QMan instance via Direct Connect Portals (DCP) must link +to the respective QMan instance + +- fsl,qman + Usage: Required + Value type: + Description: List of phandle and DCP index pairs, to the QMan instance + to which this device is connected via the DCP + +QMan Private Memory Nodes + +QMan requires two contiguous range of physical memory used for the backing store +for QMan Frame Queue Descriptor (FQD) and Packed Frame Descriptor Record (PFDR). +This memory is reserved/allocated as a nodes under the /reserved-memory node + +The QMan FQD memory node must be named "qman-fqd" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,qman-fqd" + +The QMan PFDR memory node must be named "qman-pfdr" + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must inclide "fsl,qman-pfdr" + +The following constraints are relevant to the FQD and PFDR private memory: + - The size must be 2^(size + 1), with size = 11..29. That is 4 KiB to + 1 GiB + - The alignment must be a muliptle of the memory size + +The size of the FQD and PFDP must be chosen by observing the hardware features +configured via the Reset Configuration Word (RCW) and that are relevant to a +specific board (e.g. number of MAC(s) pinned-out, number of offline/host command +FMan ports, etc.). The size configured in the DT must reflect the hardware +capabilities and not the specific needs of an application + +For additional details about reserved memory regions see reserved-memory.txt + +EXAMPLE + +The example below shows a QMan FQD and a PFDR dynamic allocation memory nodes + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + qman_fqd: qman-fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0xf 0xffffffff>; + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + +The example below shows a (P4080) QMan CCSR-space node + + clockgen: global-utilities@e1000 { + ... + sysclk: sysclk { + ... + }; + ... + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; + ... + }; + + crypto@300000 { + ... + fsl,qman = <&qman, 2>; + ... + }; + + qman: qman@318000 { + compatible = "fsl,qman"; + reg = <0x318000 0x1000>; + interrupts = <16 2 1 3> + fsl,liodn = <0x16>; + memory-region = <&qman_fqd &qman_pfdr>; + clocks = <&platform_pll 1>; + }; + + fman@400000 { + ... + fsl,qman = <&qman, 0>; + ... + }; -- cgit v0.10.2 From f3f6743d1b719ba53aa69493bf76b76a8871bbfa Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 5 Nov 2014 09:18:54 -0600 Subject: dt/bindings: Introduce the FSL QorIQ DPAA QMan portal(s) Portals are memory mapped interfaces to QMan that allow low-latency, lock-less interaction by software running on processor cores, accelerators and network interfaces with the QMan Signed-off-by: Emil Medve Change-Id: I29764fa8093b5ce65460abc879446795c50d7185 Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt new file mode 100644 index 0000000..48c4dae --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt @@ -0,0 +1,154 @@ +QorIQ DPAA Queue Manager Portals Device Tree Binding + +Copyright (C) 2008 - 2014 Freescale Semiconductor Inc. + +CONTENTS + + - QMan Portal + - QMan Pool Channel + - Example + +QMan Portal Node + +Portals are memory mapped interfaces to QMan that allow low-latency, lock-less +interaction by software running on processor cores, accelerators and network +interfaces with the QMan + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman-portal-" + May include "fsl,-qman-portal" or "fsl,qman-portal" + +- reg + Usage: Required + Value type: + Definition: Two regions. The first is the cache-enabled region of + the portal. The second is the cache-inhibited region of + the portal + +- interrupts + Usage: Required + Value type: + Definition: Standard property + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN + (FLIODN) + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + + For additional details about the PAMU/LIODN binding(s) see pamu.txt + +- fsl,qman-channel-id + Usage: Required + Value type: + Definition: The hardware index of the channel. This can also be + determined by dividing any of the channel's 8 work queue + IDs by 8 + +In addition to these properties the qman-portals should have sub-nodes to +represent the HW devices/portals that are connected to the software portal +described here + +The currently supported sub-nodes are: + * fman0 + * fman1 + * pme + * crypto + +These subnodes should have the following properties: + +- fsl,liodn + Usage: See pamu.txt + Value type: + Definition: PAMU property used for static LIODN assignment + +- fsl,iommu-parent + Usage: See pamu.txt + Value type: + Definition: PAMU property used for dynamic LIODN assignment + +- dev-handle + Usage: Required + Value type: + Definition: The phandle to the particular hardware device that this + portal is connected to. + +DPAA QMan Pool Channel Nodes + +Pool Channels are defined with the following properties. + +PROPERTIES + +- compatible + Usage: Required + Value type: + Definition: Must include "fsl,qman-pool-channel" + May include "fsl,-qman-pool-channel" + +- fsl,qman-channel-id + Usage: Required + Value type: + Definition: The hardware index of the channel. This can also be + determined by dividing any of the channel's 8 work queue + IDs by 8 + +EXAMPLE + +The example below shows a (P4080) QMan portals container/bus node with two portals + + qman-portals@ff4200000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges = <0 0xf 0xf4200000 0x200000>; + + qman-portal@0 { + compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal"; + reg = <0 0x4000>, <0x100000 0x1000>; + interrupts = <104 2 0 0>; + fsl,liodn = <1 2>; + fsl,qman-channel-id = <0>; + + fman0 { + fsl,liodn = <0x21>; + dev-handle = <&fman0>; + }; + fman1 { + fsl,liodn = <0xa1>; + dev-handle = <&fman1>; + }; + crypto { + fsl,liodn = <0x41 0x66>; + dev-handle = <&crypto>; + }; + }; + qman-portal@4000 { + compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <106 2 0 0>; + fsl,liodn = <3 4>; + fsl,qman-channel-id = <1>; + + fman0 { + fsl,liodn = <0x22>; + dev-handle = <&fman0>; + }; + fman1 { + fsl,liodn = <0xa2>; + dev-handle = <&fman1>; + }; + crypto { + fsl,liodn = <0x42 0x67>; + dev-handle = <&crypto>; + }; + }; + }; -- cgit v0.10.2 From 297d35fd2a7d3fbd4e5c0f0c1c18213117ba11ba Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Wed, 17 Sep 2014 14:08:30 +0300 Subject: powerpc/fsl: Frame Manager Device Tree binding document The Frame Manager (FMan) combines the Ethernet network interfaces with packet distribution logic to provide intelligent distribution and queuing decisions for incoming traffic at line rate. This binding document describes Freescale's Frame Manager hardware attributes that are used by the Frame Manager driver for its basic initialization and configuration. Signed-off-by: Igal Liberman Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt new file mode 100644 index 0000000..da8e5f2 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -0,0 +1,529 @@ +============================================================================= +Freescale Frame Manager Device Bindings + +CONTENTS + - FMan Node + - FMan Port Node + - FMan MURAM Node + - FMan dTSEC/XGEC/mEMAC Node + - FMan IEEE 1588 Node + - Example + +============================================================================= +FMan Node + +DESCRIPTION + +Due to the fact that the FMan is an aggregation of sub-engines (ports, MACs, +etc.) the FMan node will have child nodes for each of them. + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must include "fsl,fman" + FMan version can be determined via FM_IP_REV_1 register in the + FMan block. The offset is 0xc4 from the beginning of the + Frame Processing Manager memory map (0xc3000 from the + beginning of the FMan node). + +- cell-index + Usage: required + Value type: + Definition: Specifies the index of the FMan unit. + + The cell-index value may be used by the SoC, to identify the + FMan unit in the SoC memory map. In the table bellow, + there's a description of the cell-index use in each SoC: + + - P1023: + register[bit] FMan unit cell-index + ============================================================ + DEVDISR[1] 1 0 + + - P2041, P3041, P4080 P5020, P5040: + register[bit] FMan unit cell-index + ============================================================ + DCFG_DEVDISR2[6] 1 0 + DCFG_DEVDISR2[14] 2 1 + (Second FM available only in P4080 and P5040) + + - B4860, T1040, T2080, T4240: + register[bit] FMan unit cell-index + ============================================================ + DCFG_CCSR_DEVDISR2[24] 1 0 + DCFG_CCSR_DEVDISR2[25] 2 1 + (Second FM available only in T4240) + + DEVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in + the specific SoC "Device Configuration/Pin Control" Memory + Map. + +- reg + Usage: required + Value type: + Definition: A standard property. Specifies the offset of the + following configuration registers: + - BMI configuration registers. + - QMI configuration registers. + - DMA configuration registers. + - FPM configuration registers. + - FMan controller configuration registers. + +- ranges + Usage: required + Value type: + Definition: A standard property. + +- clocks + Usage: required + Value type: + Definition: phandle for fman clock. + +- clock-names + usage: optional + Value type: + Definition: A standard property + +- interrupts + Usage: required + Value type: + Definition: A pair of IRQs are specified in this property. + The first element is associated with the event interrupts and + the second element is associated with the error interrupts. + +- fsl,qman-channel-range + Usage: required + Value type: + Definition: Specifies the range of the available dedicated + channels in the FMan. The first cell specifies the beginning + of the range and the second cell specifies the number of + channels. + Further information available at: + "Work Queue (WQ) Channel Assignments in the QMan" section + in DPAA Reference Manual. + +============================================================================= +FMan MURAM Node + +DESCRIPTION + +FMan Internal memory - shared between all the FMan modules. +It contains data structures that are common and written to or read by +the modules. +FMan internal memory is split into the following parts: + Packet buffering (Tx/Rx FIFOs) + Frames internal context + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: Must include "fsl,fman-muram" + +- ranges + Usage: required + Value type: + Definition: A standard property. + Specifies the multi-user memory offset and the size within + the FMan. + +EXAMPLE + +muram@0 { + compatible = "fsl,fman-muram"; + ranges = <0 0x000000 0x28000>; +}; + +============================================================================= +FMan Port Node + +DESCRIPTION + +The Frame Manager (FMan) supports several types of hardware ports: + Ethernet receiver (RX) + Ethernet transmitter (TX) + Offline/Host command (O/H) + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include one of the following: + - "fsl,fman-v2-port-oh" for FManV2 OH ports + - "fsl,fman-v2-port-rx" for FManV2 RX ports + - "fsl,fman-v2-port-tx" for FManV2 TX ports + - "fsl,fman-v3-port-oh" for FManV3 OH ports + - "fsl,fman-v3-port-rx" for FManV3 RX ports + - "fsl,fman-v3-port-tx" for FManV3 TX ports + +- cell-index + Usage: required + Value type: + Definition: Specifies the hardware port id. + Each hardware port on the FMan has its own hardware PortID. + Super set of all hardware Port IDs available at FMan Reference + Manual under "FMan Hardware Ports in Freescale Devices" table. + + Each hardware port is assigned a 4KB, port-specific page in + the FMan hardware port memory region (which is part of the + FMan memory map). The first 4 KB in the FMan hardware ports + memory region is used for what are called common registers. + The subsequent 63 4KB pages are allocated to the hardware + ports. + The page of a specific port is determined by the cell-index. + +- reg + Usage: required + Value type: + Definition: There is one reg region describing the port + configuration registers. + +EXAMPLE + +port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; +}; + +port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; +}; + +port@81000 { + cell-index = <0x1>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x81000 0x1000>; +}; + +============================================================================= +FMan dTSEC/XGEC/mEMAC Node + +DESCRIPTION + +mEMAC/dTSEC/XGEC are the Ethernet network interfaces + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include one of the following: + - "fsl,fman-dtsec" for dTSEC MAC + - "fsl,fman-xgec" for XGEC MAC + - "fsl,fman-memac for mEMAC MAC + +- cell-index + Usage: required + Value type: + Definition: Specifies the MAC id. + + The cell-index value may be used by the FMan or the SoC, to + identify the MAC unit in the FMan (or SoC) memory map. + In the tables bellow there's a description of the cell-index + use, there are two tables, one describes the use of cell-index + by the FMan, the second describes the use by the SoC: + + 1. FMan Registers + + FManV2: + register[bit] MAC cell-index + ============================================================ + FM_EPI[16] XGEC 8 + FM_EPI[16+n] dTSECn n-1 + FM_NPI[11+n] dTSECn n-1 + n = 1,..,5 + + FManV3: + register[bit] MAC cell-index + ============================================================ + FM_EPI[16+n] mEMACn n-1 + FM_EPI[25] mEMAC10 9 + + FM_NPI[11+n] mEMACn n-1 + FM_NPI[10] mEMAC10 9 + FM_NPI[11] mEMAC9 8 + n = 1,..8 + + FM_EPI and FM_NPI are located in the FMan memory map. + + 2. SoC registers: + + - P2041, P3041, P4080 P5020, P5040: + register[bit] FMan MAC cell + Unit index + ============================================================ + DCFG_DEVDISR2[7] 1 XGEC 8 + DCFG_DEVDISR2[7+n] 1 dTSECn n-1 + DCFG_DEVDISR2[15] 2 XGEC 8 + DCFG_DEVDISR2[15+n] 2 dTSECn n-1 + n = 1,..5 + + - T1040, T2080, T4240, B4860: + register[bit] FMan MAC cell + Unit index + ============================================================ + DCFG_CCSR_DEVDISR2[n-1] 1 mEMACn n-1 + DCFG_CCSR_DEVDISR2[11+n] 2 mEMACn n-1 + n = 1,..6,9,10 + + EVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in + the specific SoC "Device Configuration/Pin Control" Memory + Map. + +- reg + Usage: required + Value type: + Definition: A standard property. + +- fsl,fman-ports + Usage: required + Value type: + Definition: An array of two phandles - the first references is + the FMan RX port and the second is the TX port used by this + MAC. + +- ptp-timer + Usage required + Value type: + Definition: A phandle for 1EEE1588 timer. + +EXAMPLE + +fman1_tx28: port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; +}; + +fman1_rx8: port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; +}; + +ptp-timer: ptp_timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; +}; + +ethernet@e0000 { + compatible = "fsl,fman-dtsec"; + cell-index = <0>; + reg = <0xe0000 0x1000>; + fsl,fman-ports = <&fman1_rx8 &fman1_tx28>; + ptp-timer = <&ptp-timer>; +}; + +============================================================================ +FMan IEEE 1588 Node + +DESCRIPTION + +The FMan interface to support IEEE 1588 + + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include "fsl,fman-ptp-timer". + +- reg + Usage: required + Value type: + Definition: A standard property. + +EXAMPLE + +ptp-timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; +}; + +============================================================================= +Example + +fman@400000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + compatible = "fsl,fman" + ranges = <0 0x400000 0x100000>; + reg = <0x400000 0x100000>; + clocks = <&fman_clk>; + clock-names = "fmanclk"; + interrupts = < + 96 2 0 0 + 16 2 1 1>; + fsl,qman-channel-range = <0x40 0xc>; + + muram@0 { + compatible = "fsl,fman-muram"; + reg = <0x0 0x28000>; + }; + + port@81000 { + cell-index = <1>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x81000 0x1000>; + }; + + port@82000 { + cell-index = <2>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x82000 0x1000>; + }; + + port@83000 { + cell-index = <3>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x83000 0x1000>; + }; + + port@84000 { + cell-index = <4>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x84000 0x1000>; + }; + + port@85000 { + cell-index = <5>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x85000 0x1000>; + }; + + port@86000 { + cell-index = <6>; + compatible = "fsl,fman-v2-port-oh"; + reg = <0x86000 0x1000>; + }; + + fman1_rx_0x8: port@88000 { + cell-index = <0x8>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x88000 0x1000>; + }; + + fman1_rx_0x9: port@89000 { + cell-index = <0x9>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x89000 0x1000>; + }; + + fman1_rx_0xa: port@8a000 { + cell-index = <0xa>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8a000 0x1000>; + }; + + fman1_rx_0xb: port@8b000 { + cell-index = <0xb>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8b000 0x1000>; + }; + + fman1_rx_0xc: port@8c000 { + cell-index = <0xc>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x8c000 0x1000>; + }; + + fman1_rx_0x10: port@90000 { + cell-index = <0x10>; + compatible = "fsl,fman-v2-port-rx"; + reg = <0x90000 0x1000>; + }; + + fman1_tx_0x28: port@a8000 { + cell-index = <0x28>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa8000 0x1000>; + }; + + fman1_tx_0x29: port@a9000 { + cell-index = <0x29>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xa9000 0x1000>; + }; + + fman1_tx_0x2a: port@aa000 { + cell-index = <0x2a>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xaa000 0x1000>; + }; + + fman1_tx_0x2b: port@ab000 { + cell-index = <0x2b>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xab000 0x1000>; + }; + + fman1_tx_0x2c: port@ac0000 { + cell-index = <0x2c>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xac000 0x1000>; + }; + + fman1_tx_0x30: port@b0000 { + cell-index = <0x30>; + compatible = "fsl,fman-v2-port-tx"; + reg = <0xb0000 0x1000>; + }; + + ethernet@e0000 { + compatible = "fsl,fman-dtsec"; + cell-index = <0>; + reg = <0xe0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x8 &fman1_tx_0x28>; + }; + + ethernet@e2000 { + compatible = "fsl,fman-dtsec"; + cell-index = <1>; + reg = <0xe2000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x9 &fman1_tx_0x29>; + }; + + ethernet@e4000 { + compatible = "fsl,fman-dtsec"; + cell-index = <2>; + reg = <0xe4000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xa &fman1_tx_0x2a>; + }; + + ethernet@e6000 { + compatible = "fsl,fman-dtsec"; + cell-index = <3>; + reg = <0xe6000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xb &fman1_tx_0x2b>; + }; + + ethernet@e8000 { + compatible = "fsl,fman-dtsec"; + cell-index = <4>; + reg = <0xf0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0xc &fman1_tx_0x2c>; + + ethernet@f0000 { + cell-index = <8>; + compatible = "fsl,fman-xgec"; + reg = <0xf0000 0x1000>; + fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>; + }; + + ptp-timer@fe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0xfe000 0x1000>; + }; +}; -- cgit v0.10.2 From e327fff47b6778e56f8e1dbd6493c1b0bbad1ae8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 6 Nov 2014 20:56:07 -0600 Subject: powerpc/fsl: Update fman dt binding with clock name and qbman link The clock name "fmanclk" was given in the example, but not specified in the binding itself. Made clock-names mandatory as otherwise there's not much point having it. Added a reference to the fsl,qman and fsl,bman properties proposed in http://patchwork.ozlabs.org/patch/407034/ and http://patchwork.ozlabs.org/patch/407035/ Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt index da8e5f2..edeea16 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -79,12 +79,12 @@ PROPERTIES - clocks Usage: required Value type: - Definition: phandle for fman clock. + Definition: phandle for the fman input clock. - clock-names - usage: optional + usage: required Value type: - Definition: A standard property + Definition: "fmanclk" for the fman input clock. - interrupts Usage: required @@ -104,6 +104,11 @@ PROPERTIES "Work Queue (WQ) Channel Assignments in the QMan" section in DPAA Reference Manual. +- fsl,qman +- fsl,bman + Usage: required + Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt + ============================================================================= FMan MURAM Node -- cgit v0.10.2 From e6a546fd03753ffbd9d2f2883210f8043a8791f7 Mon Sep 17 00:00:00 2001 From: Martijn de Gouw Date: Tue, 5 Aug 2014 15:52:32 +0200 Subject: powerpc/fsl-rio: add support for mapping inbound windows Add support for mapping and unmapping of inbound rapidio windows. This allows for drivers to open up a part of local memory on the rapidio network. Also applications can use this and tranfer blocks of data over the network. Signed-off-by: Martijn de Gouw [scottwood@freescale.com: updated commit message based on review] Signed-off-by: Scott Wood diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index c04b718..08d60f1 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -58,6 +58,19 @@ #define RIO_ISR_AACR 0x10120 #define RIO_ISR_AACR_AA 0x1 /* Accept All ID */ +#define RIWTAR_TRAD_VAL_SHIFT 12 +#define RIWTAR_TRAD_MASK 0x00FFFFFF +#define RIWBAR_BADD_VAL_SHIFT 12 +#define RIWBAR_BADD_MASK 0x003FFFFF +#define RIWAR_ENABLE 0x80000000 +#define RIWAR_TGINT_LOCAL 0x00F00000 +#define RIWAR_RDTYP_NO_SNOOP 0x00040000 +#define RIWAR_RDTYP_SNOOP 0x00050000 +#define RIWAR_WRTYP_NO_SNOOP 0x00004000 +#define RIWAR_WRTYP_SNOOP 0x00005000 +#define RIWAR_WRTYP_ALLOC 0x00006000 +#define RIWAR_SIZE_MASK 0x0000003F + #define __fsl_read_rio_config(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ @@ -266,6 +279,89 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, return 0; } +static void fsl_rio_inbound_mem_init(struct rio_priv *priv) +{ + int i; + + /* close inbound windows */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) + out_be32(&priv->inb_atmu_regs[i].riwar, 0); +} + +int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, + u64 rstart, u32 size, u32 flags) +{ + struct rio_priv *priv = mport->priv; + u32 base_size; + unsigned int base_size_log; + u64 win_start, win_end; + u32 riwar; + int i; + + if ((size & (size - 1)) != 0) + return -EINVAL; + + base_size_log = ilog2(size); + base_size = 1 << base_size_log; + + /* check if addresses are aligned with the window size */ + if (lstart & (base_size - 1)) + return -EINVAL; + if (rstart & (base_size - 1)) + return -EINVAL; + + /* check for conflicting ranges */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + win_start = ((u64)(in_be32(&priv->inb_atmu_regs[i].riwbar) & RIWBAR_BADD_MASK)) + << RIWBAR_BADD_VAL_SHIFT; + win_end = win_start + ((1 << ((riwar & RIWAR_SIZE_MASK) + 1)) - 1); + if (rstart < win_end && (rstart + size) > win_start) + return -EINVAL; + } + + /* find unused atmu */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + break; + } + if (i >= RIO_INB_ATMU_COUNT) + return -ENOMEM; + + out_be32(&priv->inb_atmu_regs[i].riwtar, lstart >> RIWTAR_TRAD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwbar, rstart >> RIWBAR_BADD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwar, RIWAR_ENABLE | RIWAR_TGINT_LOCAL | + RIWAR_RDTYP_SNOOP | RIWAR_WRTYP_SNOOP | (base_size_log - 1)); + + return 0; +} + +void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) +{ + u32 win_start_shift, base_start_shift; + struct rio_priv *priv = mport->priv; + u32 riwar, riwtar; + int i; + + /* skip default window */ + base_start_shift = lstart >> RIWTAR_TRAD_VAL_SHIFT; + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + + riwtar = in_be32(&priv->inb_atmu_regs[i].riwtar); + win_start_shift = riwtar & RIWTAR_TRAD_MASK; + if (win_start_shift == base_start_shift) { + out_be32(&priv->inb_atmu_regs[i].riwar, riwar & ~RIWAR_ENABLE); + return; + } + } +} + void fsl_rio_port_error_handler(int offset) { /*XXX: Error recovery is not implemented, we just clear errors */ @@ -389,6 +485,8 @@ int fsl_rio_setup(struct platform_device *dev) ops->add_outb_message = fsl_add_outb_message; ops->add_inb_buffer = fsl_add_inb_buffer; ops->get_inb_message = fsl_get_inb_message; + ops->map_inb = fsl_map_inb_mem; + ops->unmap_inb = fsl_unmap_inb_mem; rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0); if (!rmu_node) { @@ -602,6 +700,11 @@ int fsl_rio_setup(struct platform_device *dev) RIO_ATMU_REGS_PORT2_OFFSET)); priv->maint_atmu_regs = priv->atmu_regs + 1; + priv->inb_atmu_regs = (struct rio_inb_atmu_regs __iomem *) + (priv->regs_win + + ((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET : + RIO_INB_ATMU_REGS_PORT2_OFFSET)); + /* Set to receive any dist ID for serial RapidIO controller. */ if (port->phy_type == RIO_PHY_SERIAL) @@ -620,6 +723,7 @@ int fsl_rio_setup(struct platform_device *dev) rio_law_start = range_start; fsl_rio_setup_rmu(port, rmu_np[i]); + fsl_rio_inbound_mem_init(priv); dbell->mport[i] = port; diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h index ae8e274..d53407a 100644 --- a/arch/powerpc/sysdev/fsl_rio.h +++ b/arch/powerpc/sysdev/fsl_rio.h @@ -50,9 +50,12 @@ #define RIO_S_DBELL_REGS_OFFSET 0x13400 #define RIO_S_PW_REGS_OFFSET 0x134e0 #define RIO_ATMU_REGS_DBELL_OFFSET 0x10C40 +#define RIO_INB_ATMU_REGS_PORT1_OFFSET 0x10d60 +#define RIO_INB_ATMU_REGS_PORT2_OFFSET 0x10f60 #define MAX_MSG_UNIT_NUM 2 #define MAX_PORT_NUM 4 +#define RIO_INB_ATMU_COUNT 4 struct rio_atmu_regs { u32 rowtar; @@ -63,6 +66,15 @@ struct rio_atmu_regs { u32 pad2[3]; }; +struct rio_inb_atmu_regs { + u32 riwtar; + u32 pad1; + u32 riwbar; + u32 pad2; + u32 riwar; + u32 pad3[3]; +}; + struct rio_dbell_ring { void *virt; dma_addr_t phys; @@ -99,6 +111,7 @@ struct rio_priv { void __iomem *regs_win; struct rio_atmu_regs __iomem *atmu_regs; struct rio_atmu_regs __iomem *maint_atmu_regs; + struct rio_inb_atmu_regs __iomem *inb_atmu_regs; void __iomem *maint_win; void *rmm_handle; /* RapidIO message manager(unit) Handle */ }; -- cgit v0.10.2 From 9e819963b45f79e87f5a8c44960a66c0727c80e6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 20:02:42 +0530 Subject: powerpc: Disable CPU_FTR_TM if TM is disabled by firmware Firmware is allowed to communicate to us via the "ibm,pa-features" property that TM (Transactional Memory) support is disabled. Currently this doesn't happen on any platform we're aware of, but we should honor it anyway. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6af05fc..6a799b3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -160,6 +160,12 @@ static struct ibm_pa_feature { {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + /* + * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP + * which is 0 if the kernel doesn't support TM. + */ + {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, -- cgit v0.10.2 From 06743521d0eae1263a09bccb1a92a9fbb94660b3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:39 +0530 Subject: powerpc/mm: Add missing pmd accessors This patch add documentation and missing accessors. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 7b93568..132ee1d 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -57,7 +57,21 @@ #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) + +#ifndef __ASSEMBLY__ + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#endif /* !__ASSEMBLY__ */ #define pud_offset(pgdp, addr) \ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index a56b82f..1de35bbd 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -38,4 +38,7 @@ /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + #endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c4..d120924 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -152,7 +152,7 @@ #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_present(pmd) (!pmd_none(pmd)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); @@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd); #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) +extern struct page *pud_page(pud_t pud); + +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); - +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ static inline int pmd_trans_huge(pmd_t pmd) { /* @@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_PRESENT; - return 0; -} - static inline int pmd_trans_splitting(pmd_t pmd) { if (pmd_trans_huge(pmd)) @@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd) extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b460e72..2b8e5ed 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e0c7185..87ff0c1 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } -- cgit v0.10.2 From f30c59e921f12b209852e1ddc197dc6a8fb0142b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:40 +0530 Subject: mm: Update generic gup implementation to handle hugepage directory Update generic gup implementation with powerpc specific details. On powerpc at pmd level we can have hugepte, normal pmd pointer or a pointer to the hugepage directory. Tested-by: Steve Capper Acked-by: Steve Capper Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 26fe1ae..f973fce 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd) #endif #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e6d338..e6b62f3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, } #endif /* !CONFIG_HUGETLB_PAGE */ +/* + * hugepages at page global directory. If arch support + * hugepages at pgd level, they need to define this. + */ +#ifndef pgd_huge +#define pgd_huge(x) 0 +#endif + +#ifndef pgd_write +static inline int pgd_write(pgd_t pgd) +{ + BUG(); + return 0; +} +#endif + +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif + +#ifndef is_hugepd +/* + * Some architectures requires a hugepage directory format that is + * required to support multiple hugepage sizes. For example + * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" + * introduced the same on powerpc. This allows for a more flexible hugepage + * pagetable layout. + */ +typedef struct { unsigned long pd; } hugepd_t; +#define is_hugepd(hugepd) (0) +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr) +{ + return 0; +} +#else +extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr); +#endif #define HUGETLB_ANON_FILE "anon_hugepage" diff --git a/mm/gup.c b/mm/gup.c index cd62c8c..0ca1df9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -12,6 +11,7 @@ #include #include +#include #include #include "internal.h" @@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } +static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, + unsigned long end, int write, + struct page **pages, int *nr) +{ + int refs; + struct page *head, *page, *tail; + + if (write && !pgd_write(orig)) + return 0; + + refs = 0; + head = pgd_page(orig); + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pages, nr)) return 0; + } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { + /* + * architecture have different format for hugetlbfs + * pmd format and THP pmd format + */ + if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, + PMD_SHIFT, next, write, pages, nr)) + return 0; } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(pgdp, addr); + pudp = pud_offset(&pgd, addr); do { pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (pud_huge(pud)) { + if (unlikely(pud_huge(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, write, - pages, nr)) + pages, nr)) + return 0; + } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { + if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, + PUD_SHIFT, next, write, pages, nr)) return 0; } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) return 0; @@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { + pgd_t pgd = ACCESS_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - if (pgd_none(*pgdp)) + if (pgd_none(pgd)) break; - else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr)) + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, &nr)) + break; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, &nr)) + break; + } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); -- cgit v0.10.2 From b30e759072c182538abb6908681cfd49978ba5e2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Nov 2014 21:57:41 +0530 Subject: powerpc/mm: Switch to generic RCU get_user_pages_fast This patch switch the ppc arch to use the generic RCU based gup implementation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98e9c54..4622733 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d..1d53a65 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f973fce..69c0598 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) #define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d120924..5600e43 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5..a8805fe 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861..438dcd3 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d874668..0000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2b8e5ed..af56de8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif @@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); -- cgit v0.10.2 From 1665c4a892983ae5c0efa6ef88c00266e3537717 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:04 +1100 Subject: powerpc/powernv: Check PHB type in advance The patch checks PHB type a bit early to save a bit cycles for P7 because we don't support M64 for P7IOC no matter what OPAL firmware we have. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 468a0f2..7ab1dd7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -313,6 +313,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) const u32 *r; u64 pci_addr; + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { pr_info(" Firmware too old to support M64 window\n"); return; @@ -325,12 +331,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } - /* FIXME: Support M64 for P7IOC */ - if (phb->type != PNV_PHB_IODA2) { - pr_info(" Not support M64 window\n"); - return; - } - res = &hose->mem_resources[1]; res->start = of_translate_address(dn, r + 2); res->end = res->start + of_read_number(r + 4, 2) - 1; -- cgit v0.10.2 From 9e9e8935215d164cea2f49a7679cc0663c2253b6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:05 +1100 Subject: powerpc/powernv: Fix condition to remove M64 The M64 resource should be removed if we don't have hook to initialize it, or (not and) fail to do that. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7ab1dd7..7aa0404 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2000,8 +2000,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); } - /* Configure M64 window */ - if (phb->init_m64 && phb->init_m64(phb)) + /* Remove M64 resource if we can't configure it successfully */ + if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; } -- cgit v0.10.2 From 5ef73567813cddabe5fd1105e915be0851820f4f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:06 +1100 Subject: powerpc/powernv: Rename alloc_m64_pe() to reserve_m64_pe() The patch renames alloc_m64_pe() to reserve_m64_pe() to reflect its real usage: We reserve PE numbers for M64 segments in advance and then pick up the reserved PE numbers when building the mapping between PE numbers and M64 segments. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7aa0404..cf90cce 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -172,7 +172,7 @@ fail: return -EIO; } -static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) { resource_size_t sgsz = phb->ioda.m64_segsize; struct pci_dev *pdev; @@ -345,7 +345,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; } @@ -837,8 +837,8 @@ static void pnv_pci_ioda_setup_PEs(void) phb = hose->private_data; /* M64 layout might affect PE allocation */ - if (phb->alloc_m64_pe) - phb->alloc_m64_pe(phb); + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(phb); pnv_ioda_setup_PEs(hose->bus); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 34d29eb..6c02ff8 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -130,7 +130,7 @@ struct pnv_phb { u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); - void (*alloc_m64_pe)(struct pnv_phb *phb); + void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); -- cgit v0.10.2 From 4b82ab18033a7e8434e568a6485e6e5c2e5adb02 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:07 +1100 Subject: powerpc/powernv: Initialize M64 PE in time The patch initializes PE instance when reserving PE number to keep consistent things as we did before. Also, it replaces the iteration on bridge's windows with the prefered way. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cf90cce..7a7a688 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -91,6 +91,24 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +{ + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { + pr_warn("%s: Invalid PE %d on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { + pr_warn("%s: PE %d was assigned on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -185,16 +203,15 @@ static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) * instead of root bus. */ list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = PCI_BRIDGE_RESOURCES; - i <= PCI_BRIDGE_RESOURCE_END; i++) { - r = &pdev->resource[i]; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) continue; base = (r->start - phb->ioda.m64_base) / sgsz; for (step = 0; step < resource_size(r) / sgsz; step++) - set_bit(base + step, phb->ioda.pe_alloc); + pnv_ioda_reserve_pe(phb, base + step); } } } @@ -287,8 +304,6 @@ done: while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < phb->ioda.total_pe) { pe = &phb->ioda.pe_array[i]; - pe->phb = phb; - pe->pe_number = i; if (!master_pe) { pe->flags |= PNV_IODA_PE_MASTER; -- cgit v0.10.2 From b131a8425c34b41ab280edc211fb026451d43264 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:08 +1100 Subject: powerpc/powernv: Set PELTV for compound PEs Commit 262af55 ("powerpc/powernv: Enable M64 aperatus for PHB3") introduced compound PEs in order to support M64 aperatus on PHB3. However, we never configured PELTV for compound PEs. The patch fixes that by: parent PE can freeze all child compound PEs. Any compound PE affects the group. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7a7a688..b96ba48 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -522,6 +522,106 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) } #endif /* CONFIG_PCI_MSI */ +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? "adding" : "removing"; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + child->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + + if (!(child->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, &child->slaves, list) { + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + slave->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + } + + return 0; +} + +static int pnv_ioda_set_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + bool is_add) +{ + struct pnv_ioda_pe *slave; + struct pci_dev *pdev; + int ret; + + /* + * Clear PE frozen state. If it's master PE, we need + * clear slave PE frozen state as well. + */ + if (is_add) { + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) + opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); + if (ret) + return ret; + + /* For compound PEs, any one affects all of them */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) { + ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); + if (ret) + return ret; + } + } + + if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) + pdev = pe->pbus->self; + else + pdev = pe->pdev->bus->self; + while (pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *parent; + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + parent = &phb->ioda.pe_array[pdn->pe_number]; + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); + if (ret) + return ret; + } + + pdev = pdev->bus->self; + } + + return 0; +} + static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -576,23 +676,9 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return -ENXIO; } - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* Configure PELTV */ + pnv_ioda_set_peltv(phb, pe, true); - /* Add to all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } /* Setup reverse map */ for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = pe->pe_number; -- cgit v0.10.2 From 4773f76b61e5cbf41ad514978d9338d8a7ed49e1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:09 +1100 Subject: powerpc/powernv: Simplify pnv_ioda_configure_pe() Nested if statements are always bad and the patch avoids one by checking PHB type and bail in advance if necessary. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b96ba48..762ca14 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -684,26 +684,28 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) phb->ioda.pe_rmap[rid] = pe->pe_number; /* Setup one MVTs on IODA1 */ - if (phb->type == PNV_PHB_IODA1) { - pe->mve_number = pe->pe_number; - rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, - pe->pe_number); + if (phb->type != PNV_PHB_IODA1) { + pe->mve_number = 0; + goto out; + } + + pe->mve_number = pe->pe_number; + rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); + if (rc != OPAL_SUCCESS) { + pe_err(pe, "OPAL error %ld setting up MVE %d\n", + rc, pe->mve_number); + pe->mve_number = -1; + } else { + rc = opal_pci_set_mve_enable(phb->opal_id, + pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %d\n", rc, pe->mve_number); pe->mve_number = -1; - } else { - rc = opal_pci_set_mve_enable(phb->opal_id, - pe->mve_number, OPAL_ENABLE_MVE); - if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", - rc, pe->mve_number); - pe->mve_number = -1; - } } - } else if (phb->type == PNV_PHB_IODA2) - pe->mve_number = 0; + } +out: return 0; } -- cgit v0.10.2 From ec8e4e9d3d83b7a973477c2cb4831511b0eec06e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:10 +1100 Subject: powerpc/powernv: Bail upon invalid master PE When freezing compound PEs in pnv_ioda_freeze_pe(), we should bail upon illegal master PE. We needn't freeze slave PE because it should have been put into frozen state by hardware. Reported-by: Anton Blanchard Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 762ca14..d035035 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -373,7 +373,9 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) /* Fetch master PE */ if (pe->flags & PNV_IODA_PE_SLAVE) { pe = pe->master; - WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) + return; + pe_no = pe->pe_number; } -- cgit v0.10.2 From cf2b1e0eb0c281122b001cc879ca0118bff71255 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Nov 2014 13:36:11 +1100 Subject: powerpc/powernv: Fix potential zero devisor If there're no PHBs under P5IOC2 HUB device tree node, we should bail early to avoid zero devisor and allocating TCE tables. Reported-by: Anton Blanchard Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 94ce348..3336fcb 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -196,6 +196,22 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) hub_id = be64_to_cpup(prop64); pr_info(" HUB-ID : 0x%016llx\n", hub_id); + /* Count child PHBs and calculate TCE space per PHB */ + for_each_child_of_node(np, phbn) { + if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || + of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) + phb_count++; + } + + if (phb_count <= 0) { + pr_info(" No PHBs for Hub %s\n", np->full_name); + return; + } + + tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); + pr_info(" Allocating %lld MB of TCE memory per PHB\n", + tce_per_phb >> 20); + /* Currently allocate 16M of TCE memory for every Hub * * XXX TODO: Make it chip local if possible @@ -215,18 +231,6 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) return; } - /* Count child PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - /* Calculate how much TCE space we can give per PHB */ - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - /* Initialize PHBs */ for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || -- cgit v0.10.2 From 59994fb01a102a448ba758c9b824a29b4a99cc1b Mon Sep 17 00:00:00 2001 From: Vineeth Vijayan Date: Fri, 14 Nov 2014 14:42:05 +0530 Subject: powerpc: Use generic PIE randomization Back in 2009 we merged 501cb16d3cfd "Randomise PIEs", which added support for randomizing PIE (Position Independent Executable) binaries. That commit added randomize_et_dyn(), which correctly randomized the addresses, but failed to honor PF_RANDOMIZE. That means it was not possible to disable PIE randomization via the personality flag, or /proc/sys/kernel/randomize_va_space. Since then there has been generic support for PIE randomization added to binfmt_elf.c, selectable via ARCH_BINFMT_ELF_RANDOMIZE_PIE. Enabling that allows us to drop randomize_et_dyn(), which means we start honoring PF_RANDOMIZE correctly. It also causes a fairly major change to how we layout PIE binaries. Currently we will place the binary at 512MB-520MB for 32 bit binaries, or 512MB-1.5GB for 64 bit binaries, eg: $ cat /proc/$$/maps 4e550000-4e580000 r-xp 00000000 08:02 129813 /bin/dash 4e580000-4e590000 rw-p 00020000 08:02 129813 /bin/dash 10014110000-10014140000 rw-p 00000000 00:00 0 [heap] 3fffaa3f0000-3fffaa5a0000 r-xp 00000000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fffaa5a0000-3fffaa5b0000 rw-p 001a0000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fffaa5c0000-3fffaa5d0000 rw-p 00000000 00:00 0 3fffaa5d0000-3fffaa5f0000 r-xp 00000000 00:00 0 [vdso] 3fffaa5f0000-3fffaa620000 r-xp 00000000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fffaa620000-3fffaa630000 rw-p 00020000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3ffffc340000-3ffffc370000 rw-p 00000000 00:00 0 [stack] With this commit applied we don't do any special randomisation for the binary, and instead rely on mmap randomisation. This means the binary ends up at high addresses, eg: $ cat /proc/$$/maps 3fff99820000-3fff999d0000 r-xp 00000000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fff999d0000-3fff999e0000 rw-p 001a0000 08:02 921 /lib/powerpc64le-linux-gnu/libc-2.19.so 3fff999f0000-3fff99a00000 rw-p 00000000 00:00 0 3fff99a00000-3fff99a20000 r-xp 00000000 00:00 0 [vdso] 3fff99a20000-3fff99a50000 r-xp 00000000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fff99a50000-3fff99a60000 rw-p 00020000 08:02 1246 /lib/powerpc64le-linux-gnu/ld-2.19.so 3fff99a60000-3fff99a90000 r-xp 00000000 08:02 129813 /bin/dash 3fff99a90000-3fff99aa0000 rw-p 00020000 08:02 129813 /bin/dash 3fffc3de0000-3fffc3e10000 rw-p 00000000 00:00 0 [stack] 3fffc55e0000-3fffc5610000 rw-p 00000000 00:00 0 [heap] Although this should be OK, it's possible it might break badly written binaries that make assumptions about the address space layout. Signed-off-by: Vineeth Vijayan [mpe: Rewrite changelog] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4622733..421df5cc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -88,6 +88,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF + select ARCH_BINFMT_ELF_RANDOMIZE_PIE select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 888d8f3..57d289a 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -28,8 +28,7 @@ the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_ET_DYN_BASE 0x20000000 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f6b8215..b4cc7be 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1654,12 +1654,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return ret; } -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < base) - return base; - - return ret; -} -- cgit v0.10.2 From 16b1d26e77b142546e2b9b6dc3b5aa5c44ae3b77 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Tue, 14 Oct 2014 14:08:36 +0530 Subject: rtc/tpo: Driver to support rtc and wakeup on PowerNV platform The patch implements the OPAL rtc driver that binds with the rtc driver subsystem. The driver uses the platform device infrastructure to probe the rtc device and register it to rtc class framework. The 'wakeup' is supported depending upon the property 'has-tpo' present in the OF node. It provides a way to load the generic rtc driver in in the absence of an OPAL driver. The patch also moves the existing OPAL rtc get/set time interfaces to the new driver and exposes the necessary OPAL calls using EXPORT_SYMBOL_GPL. Test results: ------------- Host: [root@tul169p1 ~]# ls -l /sys/class/rtc/ total 0 lrwxrwxrwx 1 root root 0 Oct 14 03:07 rtc0 -> ../../devices/opal-rtc/rtc/rtc0 [root@tul169p1 ~]# cat /sys/devices/opal-rtc/rtc/rtc0/time 08:10:07 [root@tul169p1 ~]# echo `date '+%s' -d '+ 2 minutes'` > /sys/class/rtc/rtc0/wakealarm [root@tul169p1 ~]# cat /sys/class/rtc/rtc0/wakealarm 1413274345 [root@tul169p1 ~]# FSP: $ smgr mfgState standby $ rtim timeofday System time is valid: 2014/10/14 08:12:04.225115 $ smgr mfgState ipling $ CC: devicetree@vger.kernel.org CC: tglx@linutronix.de CC: rtc-linux@googlegroups.com CC: a.zummo@towertech.it Signed-off-by: Neelesh Gupta Signed-off-by: Michael Ellerman diff --git a/Documentation/devicetree/bindings/rtc/rtc-opal.txt b/Documentation/devicetree/bindings/rtc/rtc-opal.txt new file mode 100644 index 0000000..af87e5e --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/rtc-opal.txt @@ -0,0 +1,16 @@ +IBM OPAL real-time clock +------------------------ + +Required properties: +- comapatible: Should be "ibm,opal-rtc" + +Optional properties: +- has-tpo: Decides if the wakeup is supported or not. + +Example: + rtc { + compatible = "ibm,opal-rtc"; + has-tpo; + phandle = <0x10000029>; + linux,phandle = <0x10000029>; + }; diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5d073e5..60250e2 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,8 @@ struct opal_sg_list { #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 +#define OPAL_WRITE_TPO 103 +#define OPAL_READ_TPO 104 #define OPAL_IPMI_SEND 107 #define OPAL_IPMI_RECV 108 @@ -832,6 +834,9 @@ int64_t opal_rtc_read(__be32 *year_month_day, __be64 *hour_minute_second_millisecond); int64_t opal_rtc_write(uint32_t year_month_day, uint64_t hour_minute_second_millisecond); +int64_t opal_tpo_read(uint64_t token, __be32 *year_mon_day, __be32 *hour_min); +int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, + uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); @@ -1009,8 +1014,6 @@ extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); struct rtc_time; -extern int opal_set_rtc_time(struct rtc_time *tm); -extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9f8ea61..fa7c4f1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -989,6 +989,7 @@ void GregorianDay(struct rtc_time * tm) tm->tm_wday = day % 7; } +EXPORT_SYMBOL_GPL(GregorianDay); void to_tm(int tim, struct rtc_time * tm) { diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index e462ab9..693b6cd 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -71,6 +71,7 @@ int opal_async_get_token_interruptible(void) return token; } +EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); int __opal_async_release_token(int token) { @@ -102,6 +103,7 @@ int opal_async_release_token(int token) return 0; } +EXPORT_SYMBOL_GPL(opal_async_release_token); int opal_async_wait_response(uint64_t token, struct opal_msg *msg) { @@ -120,6 +122,7 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return 0; } +EXPORT_SYMBOL_GPL(opal_async_wait_response); static int opal_async_comp_event(struct notifier_block *nb, unsigned long msg_type, void *msg) diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 499707d..37dbee1 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -43,7 +45,7 @@ unsigned long __init opal_get_boot_time(void) long rc = OPAL_BUSY; if (!opal_check_token(OPAL_RTC_READ)) - goto out; + return 0; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); @@ -53,62 +55,33 @@ unsigned long __init opal_get_boot_time(void) mdelay(10); } if (rc != OPAL_SUCCESS) - goto out; + return 0; y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -out: - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; } -void opal_get_rtc_time(struct rtc_time *tm) +static __init int opal_time_init(void) { - long rc = OPAL_BUSY; - u32 y_m_d; - u64 h_m_s_ms; - __be32 __y_m_d; - __be64 __h_m_s_ms; + struct platform_device *pdev; + struct device_node *rtc; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + rtc = of_find_node_by_path("/ibm,opal/rtc"); + if (rtc) { + pdev = of_platform_device_create(rtc, "opal-rtc", NULL); + of_node_put(rtc); + } else { + if (opal_check_token(OPAL_RTC_READ) || + opal_check_token(OPAL_READ_TPO)) + pdev = platform_device_register_simple("opal-rtc", -1, + NULL, 0); else - mdelay(10); + return -ENODEV; } - if (rc != OPAL_SUCCESS) - return; - y_m_d = be32_to_cpu(__y_m_d); - h_m_s_ms = be64_to_cpu(__h_m_s_ms); - opal_to_tm(y_m_d, h_m_s_ms, tm); -} - -int opal_set_rtc_time(struct rtc_time *tm) -{ - long rc = OPAL_BUSY; - u32 y_m_d = 0; - u64 h_m_s_ms = 0; - - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; - y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; - y_m_d |= ((u32)bin2bcd(tm->tm_mday)); - - h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_write(y_m_d, h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); - else - mdelay(10); - } - return rc == OPAL_SUCCESS ? 0 : -EIO; + return PTR_ERR_OR_ZERO(pdev); } +machine_subsys_initcall(powernv, opal_time_init); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 59978af..0a299be 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -250,5 +250,7 @@ OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0297702..a1c37f9 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -819,3 +819,9 @@ void opal_free_sg_list(struct opal_sg_list *sg) sg = NULL; } } + +EXPORT_SYMBOL_GPL(opal_poll_events); +EXPORT_SYMBOL_GPL(opal_rtc_read); +EXPORT_SYMBOL_GPL(opal_rtc_write); +EXPORT_SYMBOL_GPL(opal_tpo_read); +EXPORT_SYMBOL_GPL(opal_tpo_write); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 941831d..30b1c3e 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -265,8 +265,6 @@ static unsigned long pnv_memory_block_size(void) static void __init pnv_setup_machdep_opal(void) { ppc_md.get_boot_time = opal_get_boot_time; - ppc_md.get_rtc_time = opal_get_rtc_time; - ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6dd12dd..c8f0ec7 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -979,6 +979,17 @@ config RTC_DRV_NUC900 If you say yes here you get support for the RTC subsystem of the NUC910/NUC920 used in embedded systems. +config RTC_DRV_OPAL + tristate "IBM OPAL RTC driver" + depends on PPC_POWERNV + default y + help + If you say yes here you get support for the PowerNV platform RTC + driver based on OPAL interfaces. + + This driver can also be built as a module. If so, the module + will be called rtc-opal. + comment "on-CPU RTC drivers" config RTC_DRV_DAVINCI diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index b188323..c8ef3e1 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o obj-$(CONFIG_RTC_DRV_MPC5121) += rtc-mpc5121.o obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o obj-$(CONFIG_RTC_DRV_NUC900) += rtc-nuc900.o +obj-$(CONFIG_RTC_DRV_OPAL) += rtc-opal.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PALMAS) += rtc-palmas.o obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c new file mode 100644 index 0000000..95f6521 --- /dev/null +++ b/drivers/rtc/rtc-opal.c @@ -0,0 +1,261 @@ +/* + * IBM OPAL RTC driver + * Copyright (C) 2014 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + */ + +#define DRVNAME "rtc-opal" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +{ + tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; + tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1; + tm->tm_mday = bcd2bin(y_m_d & 0xff); + tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); + tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); + tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); + + GregorianDay(tm); +} + +static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) +{ + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; + *y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; + *y_m_d |= ((u32)bin2bcd(tm->tm_mday)); + + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; +} + +static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d; + u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + if (rc != OPAL_SUCCESS) + return -EIO; + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); + opal_to_tm(y_m_d, h_m_s_ms, tm); + + return 0; +} + +static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d = 0; + u64 h_m_s_ms = 0; + + tm_to_opal(tm, &y_m_d, &h_m_s_ms); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_write(y_m_d, h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + return rc == OPAL_SUCCESS ? 0 : -EIO; +} + +/* + * TPO Timed Power-On + * + * TPO get/set OPAL calls care about the hour and min and to make it consistent + * with the rtc utility time conversion functions, we use the 'u64' to store + * its value and perform bit shift by 32 before use.. + */ +static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + __be32 __y_m_d, __h_m; + struct opal_msg msg; + int rc, token; + u64 h_m_s_ms; + u32 y_m_d; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + rc = opal_tpo_read(token, &__y_m_d, &__h_m); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto exit; + } + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32); + opal_to_tm(y_m_d, h_m_s_ms, &alarm->time); + +exit: + opal_async_release_token(token); + return rc; +} + +/* Set Timed Power-On */ +static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + u64 h_m_s_ms = 0, token; + struct opal_msg msg; + u32 y_m_d = 0; + int rc; + + tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms); + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + /* TPO, we care about hour and minute */ + rc = opal_tpo_write(token, y_m_d, + (u32)((h_m_s_ms >> 32) & 0xffff0000)); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) + rc = -EIO; + +exit: + opal_async_release_token(token); + return rc; +} + +static const struct rtc_class_ops opal_rtc_ops = { + .read_time = opal_get_rtc_time, + .set_time = opal_set_rtc_time, + .read_alarm = opal_get_tpo_time, + .set_alarm = opal_set_tpo_time, +}; + +static int opal_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtc; + + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "has-tpo", + NULL)) + device_set_wakeup_capable(&pdev->dev, true); + + rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + rtc->uie_unsupported = 1; + + return 0; +} + +static const struct of_device_id opal_rtc_match[] = { + { + .compatible = "ibm,opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(of, opal_rtc_match); + +static const struct platform_device_id opal_rtc_driver_ids[] = { + { + .name = "opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(platform, opal_rtc_driver_ids); + +static struct platform_driver opal_rtc_driver = { + .probe = opal_rtc_probe, + .id_table = opal_rtc_driver_ids, + .driver = { + .name = DRVNAME, + .owner = THIS_MODULE, + .of_match_table = opal_rtc_match, + }, +}; + +static int __init opal_rtc_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return -ENODEV; + + return platform_driver_register(&opal_rtc_driver); +} + +static void __exit opal_rtc_exit(void) +{ + platform_driver_unregister(&opal_rtc_driver); +} + +MODULE_AUTHOR("Neelesh Gupta "); +MODULE_DESCRIPTION("IBM OPAL RTC driver"); +MODULE_LICENSE("GPL"); + +module_init(opal_rtc_init); +module_exit(opal_rtc_exit); -- cgit v0.10.2 From 76f3e2929bb6b476fb02b519ad953e2e29ee7bd5 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Fri, 31 Jan 2014 15:10:14 +0530 Subject: powerpc/config: Enable memory driver As Freescale IFC controller has been moved to driver to driver/memory. So enable memory driver in powerpc config Signed-off-by: Prabhakar Kushwaha Signed-off-by: Scott Wood diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 688e9e4..611efe9 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -144,6 +144,7 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_UIO=y CONFIG_STAGING=y +CONFIG_MEMORY=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 6db97e4..be24a18 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -118,6 +118,7 @@ CONFIG_FSL_DMA=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_FSL_CORENET_CF=y +CONFIG_MEMORY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index d2c4154..02395fa 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -215,6 +215,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 8746008..b5d1b82 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -216,6 +216,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -- cgit v0.10.2 From bc78b05bb412fad135715551fc536ca511a3cff2 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 14 Nov 2014 17:37:50 +1100 Subject: cxl: Return error to PSL if IRQ demultiplexing fails & print clearer warning If an AFU has a hardware bug that causes it to acknowledge a context terminate or remove while that context has outstanding transactions, it is possible for the kernel to receive an interrupt for that context after we have removed it from the context list. The kernel will not be able to demultiplex the interrupt (or worse - if we have already reallocated the process handle we could mis-attribute it to the new context), and printed a big scary warning. It did not acknowledge the interrupt, which would effectively halt further translation fault processing on the PSL. This patch makes the warning clearer about the likely cause of the issue (i.e. hardware bug) to make it obvious to future AFU designers of what needs to be fixed. It also prints out the process handle which can then be matched up with hardware and software traces for debugging. It also acknowledges the interrupt to the PSL with either an address error or acknowledge, so that the PSL can continue with other translations. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 3d2b867..64a4aa3 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -612,7 +612,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int cxl_detach_process(struct cxl_context *ctx); -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info); +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); int cxl_check_error(struct cxl_afu *afu); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 336020c..35fcb3d 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -92,20 +92,13 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da return IRQ_HANDLED; } -static irqreturn_t cxl_irq(int irq, void *data) +static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) { struct cxl_context *ctx = data; - struct cxl_irq_info irq_info; u64 dsisr, dar; - int result; - if ((result = cxl_get_irq(ctx, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", result); - return IRQ_HANDLED; - } - - dsisr = irq_info.dsisr; - dar = irq_info.dar; + dsisr = irq_info->dsisr; + dar = irq_info->dar; pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); @@ -149,9 +142,9 @@ static irqreturn_t cxl_irq(int irq, void *data) if (dsisr & CXL_PSL_DSISR_An_UR) pr_devel("CXL interrupt: AURP PTE not found\n"); if (dsisr & CXL_PSL_DSISR_An_PE) - return handle_psl_slice_error(ctx, dsisr, irq_info.errstat); + return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err); + pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); if (ctx->pending_afu_err) { /* @@ -163,10 +156,10 @@ static irqreturn_t cxl_irq(int irq, void *data) */ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " "undelivered to pe %i: %.llx\n", - ctx->pe, irq_info.afu_err); + ctx->pe, irq_info->afu_err); } else { spin_lock(&ctx->lock); - ctx->afu_err = irq_info.afu_err; + ctx->afu_err = irq_info->afu_err; ctx->pending_afu_err = 1; spin_unlock(&ctx->lock); @@ -182,24 +175,43 @@ static irqreturn_t cxl_irq(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + static irqreturn_t cxl_irq_multiplexed(int irq, void *data) { struct cxl_afu *afu = data; struct cxl_context *ctx; + struct cxl_irq_info irq_info; int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; int ret; + if ((ret = cxl_get_irq(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + rcu_read_lock(); ctx = idr_find(&afu->contexts_idr, ph); if (ctx) { - ret = cxl_irq(irq, ctx); + ret = cxl_irq(irq, ctx, &irq_info); rcu_read_unlock(); return ret; } rcu_read_unlock(); - WARN(1, "Unable to demultiplex CXL PSL IRQ\n"); - return IRQ_HANDLED; + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" + " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); } static irqreturn_t cxl_irq_afu(int irq, void *data) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index d47532e..9a5a442 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -637,18 +637,18 @@ int cxl_detach_process(struct cxl_context *ctx) return detach_process_native_afu_directed(ctx); } -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info) +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; - info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); - info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An); - info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An); - pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An); + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); + pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An); info->pid = pidtid >> 32; info->tid = pidtid & 0xffffffff; - info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An); - info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An); + info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); return 0; } -- cgit v0.10.2 From 80fa93fce37d3490f4bb0da8a5b239a6745bc744 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 14 Nov 2014 18:09:28 +1100 Subject: cxl: Name interrupts in /proc/interrupt Currently all interrupts generated by cxl are named "cxl". This is not very informative as we can't distinguish between cards, AFUs, error interrupts, user contexts and user interrupts numbers. Being able to distinguish them is useful for setting affinity. This patch gives each of these names in /proc/interrupts. A two card CAPI system, with afu0.0 having 2 active contexts each with 4 user IRQs each, will now look like this: % grep cxl /proc/interrupts 444: 0 OPAL ICS 141312 Level cxl-card1-err 445: 0 OPAL ICS 141313 Level cxl-afu1.0-err 446: 0 OPAL ICS 141314 Level cxl-afu1.0 462: 0 OPAL ICS 2052 Level cxl-afu0.0-pe0-1 463: 75517 OPAL ICS 2053 Level cxl-afu0.0-pe0-2 468: 0 OPAL ICS 2054 Level cxl-afu0.0-pe0-3 469: 0 OPAL ICS 2055 Level cxl-afu0.0-pe0-4 470: 0 OPAL ICS 2056 Level cxl-afu0.0-pe1-1 471: 75506 OPAL ICS 2057 Level cxl-afu0.0-pe1-2 472: 0 OPAL ICS 2058 Level cxl-afu0.0-pe1-3 473: 0 OPAL ICS 2059 Level cxl-afu0.0-pe1-4 502: 1066 OPAL ICS 2050 Level cxl-afu0.0 514: 0 OPAL ICS 2048 Level cxl-card0-err 515: 0 OPAL ICS 2049 Level cxl-afu0.0-err Signed-off-by: Michael Neuling Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 64a4aa3..b5b6bda 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -336,6 +336,8 @@ struct cxl_sste { struct cxl_afu { irq_hw_number_t psl_hwirq; irq_hw_number_t serr_hwirq; + char *err_irq_name; + char *psl_irq_name; unsigned int serr_virq; void __iomem *p1n_mmio; void __iomem *p2n_mmio; @@ -379,6 +381,12 @@ struct cxl_afu { bool enabled; }; + +struct cxl_irq_name { + struct list_head list; + char *name; +}; + /* * This is a cxl context. If the PSL is in dedicated mode, there will be one * of these per AFU. If in AFU directed there can be lots of these. @@ -403,6 +411,7 @@ struct cxl_context { unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; + struct list_head irq_names; u64 fault_addr; u64 fault_dsisr; u64 afu_err; @@ -444,6 +453,7 @@ struct cxl { struct dentry *trace; struct dentry *psl_err_chk; struct dentry *debugfs; + char *irq_name; struct bin_attribute cxl_attr; int adapter_num; int user_irqs; @@ -563,9 +573,6 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); int cxl_afu_deactivate_mode(struct cxl_afu *afu); int cxl_afu_select_best_mode(struct cxl_afu *afu); -unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie); -void cxl_unmap_irq(unsigned int virq, void *cookie); int cxl_register_psl_irq(struct cxl_afu *afu); void cxl_release_psl_irq(struct cxl_afu *afu); int cxl_register_psl_err_irq(struct cxl *adapter); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 35fcb3d..c294925 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -255,7 +255,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) } unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie) + irq_handler_t handler, void *cookie, const char *name) { unsigned int virq; int result; @@ -271,7 +271,7 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); - result = request_irq(virq, handler, 0, "cxl", cookie); + result = request_irq(virq, handler, 0, name, cookie); if (result) { dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); return 0; @@ -290,14 +290,15 @@ static int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, void *cookie, irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq) + unsigned int *dest_virq, + const char *name) { int hwirq, virq; if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) return hwirq; - if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie))) + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) goto err; *dest_hwirq = hwirq; @@ -314,10 +315,19 @@ int cxl_register_psl_err_irq(struct cxl *adapter) { int rc; + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, &adapter->err_hwirq, - &adapter->err_virq))) + &adapter->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; return rc; + } cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); @@ -329,6 +339,7 @@ void cxl_release_psl_err_irq(struct cxl *adapter) cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); + kfree(adapter->irq_name); } int cxl_register_serr_irq(struct cxl_afu *afu) @@ -336,10 +347,18 @@ int cxl_register_serr_irq(struct cxl_afu *afu) u64 serr; int rc; + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, &afu->serr_hwirq, - &afu->serr_virq))) + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; return rc; + } serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); @@ -353,24 +372,50 @@ void cxl_release_serr_irq(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); } int cxl_register_psl_irq(struct cxl_afu *afu) { - return cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, - &afu->psl_hwirq, &afu->psl_virq); + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, + &afu->psl_hwirq, &afu->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; } void cxl_release_psl_irq(struct cxl_afu *afu) { cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); + kfree(afu->psl_irq_name); +} + +void afu_irq_name_free(struct cxl_context *ctx) +{ + struct cxl_irq_name *irq_name, *tmp; + + list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { + kfree(irq_name->name); + list_del(&irq_name->list); + kfree(irq_name); + } } int afu_register_irqs(struct cxl_context *ctx, u32 count) { irq_hw_number_t hwirq; - int rc, r, i; + int rc, r, i, j = 1; + struct cxl_irq_name *irq_name; if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -384,15 +429,47 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) return -ENOMEM; + + /* + * Allocate names first. If any fail, bail out before allocating + * actual hardware IRQs. + */ + INIT_LIST_HEAD(&ctx->irq_names); + for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + irq_name = kmalloc(sizeof(struct cxl_irq_name), + GFP_KERNEL); + if (!irq_name) + goto out; + irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", + dev_name(&ctx->afu->dev), + ctx->pe, j); + if (!irq_name->name) { + kfree(irq_name); + goto out; + } + /* Add to tail so next look get the correct order */ + list_add_tail(&irq_name->list, &ctx->irq_names); + j++; + } + } + + /* We've allocated all memory now, so let's do the irq allocations */ + irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); for (r = 1; r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { cxl_map_irq(ctx->afu->adapter, hwirq, - cxl_irq_afu, ctx); + cxl_irq_afu, ctx, irq_name->name); + irq_name = list_next_entry(irq_name, list); } } return 0; + +out: + afu_irq_name_free(ctx); + return -ENOMEM; } void afu_release_irqs(struct cxl_context *ctx) @@ -410,5 +487,6 @@ void afu_release_irqs(struct cxl_context *ctx) } } + afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); } -- cgit v0.10.2 From a49ab6eeebe5624d51466969a7bac611231eede8 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 17 Nov 2014 10:52:30 +0800 Subject: powerpc/pseries: Initialise nvram_pstore_info's buf_lock nvram_pstore_info's buf_lock is not initialized before registering, which is clearly incorrect. It causes some strange behavior when trying to obtain the lock during kdump process. On a UP configuration, the console stopped for a couple of seconds, then "lockup suspected" warning printed out, but then it continued to run. So try lock fails, and lockup reported, but then arch_spin_lock() passes. Signed-off-by: Li Zhong [mpe: Edited changelog] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 11a3b61..054a0ed 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -715,6 +715,8 @@ static int nvram_pstore_init(void) nvram_pstore_info.buf = oops_data; nvram_pstore_info.bufsize = oops_data_sz; + spin_lock_init(&nvram_pstore_info.buf_lock); + rc = pstore_register(&nvram_pstore_info); if (rc != 0) pr_err("nvram: pstore_register() failed, defaults to " -- cgit v0.10.2 From e39f223fc93580c86ccf6b3422033e349f57f0dd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 18 Nov 2014 16:47:35 +1100 Subject: powerpc: Remove more traces of bootmem Although we are now selecting NO_BOOTMEM, we still have some traces of bootmem lying around. That is because even with NO_BOOTMEM there is still a shim that converts bootmem calls into memblock calls, but ultimately we want to remove all traces of bootmem. Most of the patch is conversions from alloc_bootmem() to memblock_virt_alloc(). In general a call such as: p = (struct foo *)alloc_bootmem(x); Becomes: p = memblock_virt_alloc(x, 0); We don't need the cast because memblock_virt_alloc() returns a void *. The alignment value of zero tells memblock to use the default alignment, which is SMP_CACHE_BYTES, the same value alloc_bootmem() uses. We remove a number of NULL checks on the result of memblock_virt_alloc(). That is because memblock_virt_alloc() will panic if it can't allocate, in exactly the same way as alloc_bootmem(), so the NULL checks are and always have been redundant. The memory returned by memblock_virt_alloc() is already zeroed, so we remove several memsets of the result of memblock_virt_alloc(). Finally we convert a few uses of __alloc_bootmem(x, y, MAX_DMA_ADDRESS) to just plain memblock_virt_alloc(). We don't use memblock_alloc_base() because MAX_DMA_ADDRESS is ~0ul on powerpc, so limiting the allocation to that is pointless, 16XB ought to be enough for anyone. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bc2dab5..37d512d 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 432459c..1f793003 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -199,9 +199,7 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); - if (!of_prop) - return; + of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6e5310d..49f553b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -660,7 +660,7 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel this initializes bootmem, which is used + * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index da22c84..4a6c2cf 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -13,9 +13,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) if (mem_init_done) p = kzalloc(size, mask); else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); + p = memblock_virt_alloc(size, 0); } return p; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de8..8c9b811 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -315,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) * If gpages can be in highmem we can't use the trick of storing the * data structure in the page; allocate space for this */ - m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; #else m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 928ebe7..9cba6cb 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -421,12 +421,12 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = alloc_bootmem(CTX_MAP_SIZE); - context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0); #ifndef CONFIG_SMP - stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0); #else - stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 2b98a36..3ce70de 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -401,11 +401,11 @@ error: } else { if (config && *config) { size = 256; - free_bootmem(__pa(*config), size); + memblock_free(__pa(*config), size); } if (res && *res) { size = sizeof(struct celleb_pci_resource); - free_bootmem(__pa(*res), size); + memblock_free(__pa(*res), size); } } diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 014d06e6..60b03a1 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,11 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = alloc_bootmem(NVRAM_SIZE); - if (nvram_image == NULL) { - printk(KERN_ERR "nvram: can't allocate ram image\n"); - return -ENOMEM; - } + nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d035035..cc861c14 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1940,19 +1940,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (!phb) { - pr_err(" Out of memory !\n"); - return; - } + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); /* Allocate PCI controller */ - memset(phb, 0, sizeof(struct pnv_phb)); phb->hose = hose = pcibios_alloc_controller(np); if (!phb->hose) { pr_err(" Can't allocate PCI controller for %s\n", np->full_name); - free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); + memblock_free(__pa(phb), sizeof(struct pnv_phb)); return; } @@ -2019,8 +2014,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); - aux = alloc_bootmem(size); - memset(aux, 0, size); + aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 3336fcb..6ef6d4d 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, return; } - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = pcibios_alloc_controller(np); - } - if (!phb || !phb->hose) { + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb->hose = pcibios_alloc_controller(np); + if (!phb->hose) { pr_err(" Failed to allocate PCI controller\n"); return; } @@ -216,12 +213,7 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) * * XXX TODO: Make it chip local if possible */ - tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY, - __pa(MAX_DMA_ADDRESS)); - if (!tce_mem) { - pr_err(" Failed to allocate TCE Memory !\n"); - return; - } + tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); pr_debug(" TCE : 0x%016lx..0x%016lx\n", __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 009a200..799c858 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -125,12 +125,7 @@ static void __init prealloc(struct ps3_prealloc *p) if (!p->size) return; - p->address = __alloc_bootmem(p->size, p->align, __pa(MAX_DMA_ADDRESS)); - if (!p->address) { - printk(KERN_ERR "%s: Cannot allocate %s\n", __func__, - p->name); - return; - } + p->address = memblock_virt_alloc(p->size, p->align); printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, p->address); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index d8484d7..6455c1e 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From b85743ee95f3593ded82bca55cd3c38090ad001f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:28 +1100 Subject: powerpc/eeh: Refactor eeh_reset_pe() The patch refactors eeh_reset_pe() in order for: * Varied return values for different failure cases. * Replace pr_err() with pr_warn() and print function name. * Coding style cleanup. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2248a19..967e4a0 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -758,30 +758,37 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) int eeh_reset_pe(struct eeh_pe *pe) { int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, rc; + int i, state, ret; /* Take three shots at resetting the bus */ - for (i=0; i<3; i++) { + for (i = 0; i < 3; i++) { eeh_reset_pe_once(pe); /* * EEH_PE_ISOLATED is expected to be removed after * BAR restore. */ - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((rc & flags) == flags) - return 0; + state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if ((state & flags) == flags) { + ret = 0; + goto out; + } - if (rc < 0) { - pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + if (state < 0) { + pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", __func__, pe->phb->global_number, pe->addr); - return -1; + ret = -ENOTRECOVERABLE; + goto out; } - pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", - i+1, pe->phb->global_number, pe->addr, rc); + + /* We might run out of credits */ + ret = -EIO; + pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", + __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } - return -1; +out: + return ret; } /** -- cgit v0.10.2 From 28bf36f92afc6b22ba50ceaf36ba89afa9f5c1e8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:29 +1100 Subject: powerpc/eeh: Set EEH_PE_RESET on PE reset The patch introduces additional flag EEH_PE_RESET to indicate the corresponding PE is under reset. In turn, the PE retrieval bakcend on PowerNV platform can return unfrozen state for the EEH core to moving forward. Flag EEH_PE_CFG_BLOCKED isn't the correct one for the purpose. In PCI passthrou case, the problem is more worse: Guest doesn't recover 6th EEH error. The PE is left in isolated (frozen) and config blocked state on Broadcom adapters. We can't retrieve the PE's state correctly any more, even from the host side via sysfs /sys/bus/pci/devices/xxx/eeh_pe_state. Reported-by: Rajeshkumar Subramanian Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ca07f9c..2e633b4 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -72,6 +72,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ #define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */ +#define EEH_PE_RESET (1 << 3) /* PE reset in progress */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 967e4a0..b372bfd 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -760,6 +760,9 @@ int eeh_reset_pe(struct eeh_pe *pe) int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int i, state, ret; + /* Mark as reset and block config space */ + eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + /* Take three shots at resetting the bus */ for (i = 0; i < 3; i++) { eeh_reset_pe_once(pe); @@ -788,6 +791,7 @@ int eeh_reset_pe(struct eeh_pe *pe) } out: + eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); return ret; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6535936..b17e793 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -528,13 +528,11 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Issue reset */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); ret = eeh_reset_pe(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; } - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Unfreeze the PE */ ret = eeh_clear_pe_frozen_state(pe, true); @@ -601,19 +599,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); rc = eeh_reset_pe(pe); - if (rc) { - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (rc) return rc; - } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index db3803e..fb38fe4 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -372,7 +372,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) * moving forward, we have to return operational * state during PE reset. */ - if (pe->state & EEH_PE_CFG_BLOCKED) { + if (pe->state & EEH_PE_RESET) { result = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | -- cgit v0.10.2 From b1d76a7d57762332cd8e0c020470d43c5ad3948e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 14 Nov 2014 10:47:30 +1100 Subject: powerpc/eeh: Recover EEH error on ownership change for BCM5719 In PCI passthrou scenario, we need simulate EEH recovery for Emulex adapters when their ownership changes, as we did in commit 5cfb20b96 ("powerpc/eeh: Emulate EEH recovery for VFIO devices"). Broadcom BCM5719 adpaters are facing same problem and needs same cure. Reported-by: Rajeshkumar Subramanian Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b372bfd..f1c6b11 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1220,6 +1220,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) static struct pci_device_id eeh_reset_ids[] = { { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ { 0 } }; -- cgit v0.10.2 From a450e8f55a57d049ac3afe218f06567e12d6b4f5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sat, 22 Nov 2014 21:58:09 +1100 Subject: powerpc/eeh: Dump PHB diag-data early On PowerNV platform, PHB diag-data is dumped after stopping device drivers. In case of recursive EEH errors, the kernel is usually crashed before dumping PHB diag-data for the second EEH error. It's hard to locate the root cause of the second EEH error without PHB diag-data. The patch adds one more EEH option "eeh=early_log", which helps dumping PHB diag-data immediately once frozen PE is detected, in order to get the PHB diag-data for the second EEH error. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2e633b4..0652ebe 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -39,6 +39,7 @@ struct device_node; #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ #define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ +#define EEH_EARLY_DUMP_LOG 0x20 /* Dump log immediately */ /* * Delay for PE reset, all in ms diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f1c6b11..05be77d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -143,6 +143,8 @@ static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) eeh_add_flag(EEH_FORCE_DISABLED); + else if (!strcmp(str, "early_log")) + eeh_add_flag(EEH_EARLY_DUMP_LOG); return 1; } diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index fb38fe4..2809c98 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -353,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe) } else if (!(pe->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -451,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -730,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); return 0; } @@ -1086,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); } /* -- cgit v0.10.2 From 6d626c5ea3d8411cc2a72d7cabe70f01dfc32d1d Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 24 Nov 2014 21:59:26 +0530 Subject: powerpc/powernv: Cleanup unused MCE definitions/declarations. Cleanup OpalMCE_* definitions/declarations and other related code which is not used anymore. Signed-off-by: Mahesh Salgaonkar Acked-by: Benjamin Herrrenschmidt Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 60250e2..5cd8d2f 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -288,62 +288,6 @@ enum OpalMessageType { OPAL_MSG_TYPE_MAX, }; -/* Machine check related definitions */ -enum OpalMCE_Version { - OpalMCE_V1 = 1, -}; - -enum OpalMCE_Severity { - OpalMCE_SEV_NO_ERROR = 0, - OpalMCE_SEV_WARNING = 1, - OpalMCE_SEV_ERROR_SYNC = 2, - OpalMCE_SEV_FATAL = 3, -}; - -enum OpalMCE_Disposition { - OpalMCE_DISPOSITION_RECOVERED = 0, - OpalMCE_DISPOSITION_NOT_RECOVERED = 1, -}; - -enum OpalMCE_Initiator { - OpalMCE_INITIATOR_UNKNOWN = 0, - OpalMCE_INITIATOR_CPU = 1, -}; - -enum OpalMCE_ErrorType { - OpalMCE_ERROR_TYPE_UNKNOWN = 0, - OpalMCE_ERROR_TYPE_UE = 1, - OpalMCE_ERROR_TYPE_SLB = 2, - OpalMCE_ERROR_TYPE_ERAT = 3, - OpalMCE_ERROR_TYPE_TLB = 4, -}; - -enum OpalMCE_UeErrorType { - OpalMCE_UE_ERROR_INDETERMINATE = 0, - OpalMCE_UE_ERROR_IFETCH = 1, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, - OpalMCE_UE_ERROR_LOAD_STORE = 3, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, -}; - -enum OpalMCE_SlbErrorType { - OpalMCE_SLB_ERROR_INDETERMINATE = 0, - OpalMCE_SLB_ERROR_PARITY = 1, - OpalMCE_SLB_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_EratErrorType { - OpalMCE_ERAT_ERROR_INDETERMINATE = 0, - OpalMCE_ERAT_ERROR_PARITY = 1, - OpalMCE_ERAT_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_TlbErrorType { - OpalMCE_TLB_ERROR_INDETERMINATE = 0, - OpalMCE_TLB_ERROR_PARITY = 1, - OpalMCE_TLB_ERROR_MULTIHIT = 2, -}; - enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, OPAL_THREAD_STARTED = 0x1, @@ -467,54 +411,6 @@ struct opal_ipmi_msg { uint8_t data[]; }; -struct opal_machine_check_event { - enum OpalMCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum OpalMCE_Severity severity:8; /* 0x02 */ - enum OpalMCE_Initiator initiator:8; /* 0x03 */ - enum OpalMCE_ErrorType error_type:8; /* 0x04 */ - enum OpalMCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ - struct { - enum OpalMCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; - } ue_error; - - struct { - enum OpalMCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } slb_error; - - struct { - enum OpalMCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } erat_error; - - struct { - enum OpalMCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } tlb_error; - } u; -}; - /* FSP memory errors handling */ enum OpalMemErr_Version { OpalMemErr_V1 = 1, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index a5139ea..24a386c 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -42,7 +42,6 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; -struct opal_machine_check_event; /* * Defines the layout of the paca. @@ -153,12 +152,6 @@ struct paca_struct { u64 tm_scratch; /* TM scratch area for reclaim */ #endif -#ifdef CONFIG_PPC_POWERNV - /* Pointer to OPAL machine check event structure set by the - * early exception handler for use by high level C handler - */ - struct opal_machine_check_event *opal_mc_evt; -#endif #ifdef CONFIG_PPC_BOOK3S_64 /* Exclusive emergency stack pointer for machine check exception. */ void *mc_emergency_sp; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9d7dede..c161ef3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -726,12 +726,5 @@ int main(void) arch.timing_last_enter.tv32.tbl)); #endif -#ifdef CONFIG_PPC_POWERNV - DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3)); - DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0)); - DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1)); - DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt)); -#endif - return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a1d45c1..ad62f4d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1312,23 +1312,6 @@ hmi_exception_after_realmode: EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv -#ifdef CONFIG_PPC_POWERNV -_GLOBAL(opal_mc_secondary_handler) - HMT_MEDIUM_PPR_DISCARD - SET_SCRATCH0(r13) - GET_PACA(r13) - clrldi r3,r3,2 - tovirt(r3,r3) - std r3,PACA_OPAL_MC_EVT(r13) - ld r13,OPAL_MC_SRR0(r3) - mtspr SPRN_SRR0,r13 - ld r13,OPAL_MC_SRR1(r3) - mtspr SPRN_SRR1,r13 - ld r3,OPAL_MC_GPR3(r3) - GET_SCRATCH0(r13) - b machine_check_pSeries -#endif /* CONFIG_PPC_POWERNV */ - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index a1c37f9..cb0b6de 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -50,7 +50,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); -- cgit v0.10.2 From 221195fb80daa1a0c2fd54a023081c416fe93340 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 25 Nov 2014 17:10:06 +0100 Subject: powerpc: Drop useless warning in eeh_init() This is what we get in dmesg when booting a pseries guest and the hypervisor doesn't provide EEH support. [ 0.166655] EEH functionality not supported [ 0.166778] eeh_init: Failed to call platform init function (-22) Since both powernv_eeh_init() and pseries_eeh_init() already complain when hitting an error, it is not needed to print more (especially such an uninformative message). Signed-off-by: Greg Kurz Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 05be77d..e1b6d8e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -933,11 +933,8 @@ int eeh_init(void) pr_warn("%s: Platform EEH operation not found\n", __func__); return -EEXIST; - } else if ((ret = eeh_ops->init())) { - pr_warn("%s: Failed to call platform init function (%d)\n", - __func__, ret); + } else if ((ret = eeh_ops->init())) return ret; - } /* Initialize EEH event */ ret = eeh_event_init(); -- cgit v0.10.2 From 0ec2698fe1c451606c16e21dbfbd29efce694668 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 3 Nov 2014 20:21:34 +0530 Subject: powerpc/mm: Check for matching hpte without taking hpte lock With smaller hash page table config, we would end up in situation where we would be replacing hash page table slot frequently. In such config, we will find the hpte to be not matching, and we can do that check without holding the hpte lock. We need to recheck the hpte again after holding lock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d53288a..558e50b 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -294,8 +294,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -308,16 +306,24 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> miss\n"); ret = -1; } else { - DBG_LOW(" -> hit\n"); - /* Update the HPTE */ - hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = be64_to_cpu(hptep->v); + if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID))) { + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & + ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | + HPTE_R_C))); + } + native_unlock_hpte(hptep); } - native_unlock_hpte(hptep); - /* Ensure it is out of the tlb too. */ tlbie(vpn, bpsize, apsize, ssize, local); - return ret; } -- cgit v0.10.2 From 0de3b56b137e92afa6a4581a0bb51633159180d6 Mon Sep 17 00:00:00 2001 From: Jiang Lu Date: Fri, 21 Nov 2014 11:23:19 +0800 Subject: powerpc/oprofile: Disable pagefaults during user stack read A page fault occurred during reading user stack in oprofile backtrace would lead following calltrace: WARNING: at linux/kernel/smp.c:210 Modules linked in: CPU: 5 PID: 736 Comm: sh Tainted: G W 3.14.23-WR7.0.0.0_standard #1 task: c0000000f6208bc0 ti: c00000007c72c000 task.ti: c00000007c72c000 NIP: c0000000000ed6e4 LR: c0000000000ed5b8 CTR: 0000000000000000 REGS: c00000007c72f050 TRAP: 0700 Tainted: G W (3.14.23-WR7.0.0 tandard) MSR: 0000000080021000 CR: 48222482 XER: 00000000 SOFTE: 0 GPR00: c0000000000ed5b8 c00000007c72f2d0 c0000000010aa048 0000000000000005 GPR04: c000000000fdb820 c00000007c72f410 0000000000000001 0000000000000005 GPR08: c0000000010b5768 c000000000f8a048 0000000000000001 0000000000000000 GPR12: 0000000048222482 c00000000fffe580 0000000022222222 0000000010129664 GPR16: 0000000010143cc0 0000000000000000 0000000044444444 0000000000000000 GPR20: c00000007c7221d8 c0000000f638e3c8 000003f15a20120d 0000000000000001 GPR24: 000000005a20120d c00000007c722000 c00000007cdedda8 00003fffef23b160 GPR28: 0000000000000001 c00000007c72f410 c000000000fdb820 0000000000000006 NIP [c0000000000ed6e4] .smp_call_function_single+0x18c/0x248 LR [c0000000000ed5b8] .smp_call_function_single+0x60/0x248 Call Trace: [c00000007c72f2d0] [c0000000000ed5b8] .smp_call_function_single+0x60/0x248 (unreliable) [c00000007c72f3a0] [c000000000030810] .__flush_tlb_page+0x164/0x1b0 [c00000007c72f460] [c00000000002e054] .ptep_set_access_flags+0xb8/0x168 [c00000007c72f500] [c0000000001ad3d8] .handle_mm_fault+0x4a8/0xbac [c00000007c72f5e0] [c000000000bb3238] .do_page_fault+0x3b8/0x868 [c00000007c72f810] [c00000000001e1d0] storage_fault_common+0x20/0x44 Exception: 301 at .__copy_tofrom_user_base+0x54/0x5b0 LR = .op_powerpc_backtrace+0x190/0x20c [c00000007c72fb00] [c000000000a2ec34] .op_powerpc_backtrace+0x204/0x20c (unreliable) [c00000007c72fbc0] [c000000000a2b5fc] .oprofile_add_ext_sample+0xe8/0x118 [c00000007c72fc70] [c000000000a2eee0] .fsl_emb_handle_interrupt+0x20c/0x27c [c00000007c72fd30] [c000000000a2e440] .op_handle_interrupt+0x44/0x58 [c00000007c72fdb0] [c000000000016d68] .performance_monitor_exception+0x74/0x90 [c00000007c72fe30] [c00000000001d8b4] exc_0x260_common+0xfc/0x100 performance_monitor_exception() is executed in a context with interrupt disabled and preemption enabled. When there is a user space page fault happened, do_page_fault() invoke in_atomic() to decide whether kernel should handle such page fault. in_atomic() only check preempt_count. So need call pagefault_disable() to disable preemption before reading user stack. Signed-off-by: Jiang Lu Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6adf55f..ecc66d5 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -105,6 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) first_frame = 0; } } else { + pagefault_disable(); #ifdef CONFIG_PPC64 if (!is_32bit_task()) { while (depth--) { @@ -113,7 +114,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } - + pagefault_enable(); return; } #endif @@ -124,5 +125,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } + pagefault_enable(); } } -- cgit v0.10.2 From c4f3eb5fc527b749d6fb0d47ffdcfe83706dbe2f Mon Sep 17 00:00:00 2001 From: James Yang Date: Fri, 14 Nov 2014 12:32:24 -0600 Subject: powerpc/mm/hugetlb: Sanity check gigantic hugepage count Limit the number of gigantic hugepages specified by the hugepages= parameter to MAX_NUMBER_GPAGES. Signed-off-by: James Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index af56de8..747e0c6 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -355,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val, if (size != 0) { if (sscanf(val, "%lu", &npages) <= 0) npages = 0; + if (npages > MAX_NUMBER_GPAGES) { + pr_warn("MMU: %lu pages requested for page " + "size %llu KB, limiting to " + __stringify(MAX_NUMBER_GPAGES) "\n", + npages, size / 1024); + npages = MAX_NUMBER_GPAGES; + } gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; size = 0; } -- cgit v0.10.2 From f1581bf14bc40b4a14bf10358eac0b22173b3313 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:27 +0530 Subject: powerpc/mm/thp: Remove code duplication Rename invalidate_old_hpte to flush_hash_hugepage and use that in other places. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index cd7c271..19550d3 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -127,7 +127,8 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); - +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f010277..6c2076c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1315,6 +1315,58 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #endif } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void flush_hash_range(unsigned long number, int local) { if (ppc_md.flush_hash_range) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e632..1b3ad46 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,57 +18,6 @@ #include #include -static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) -{ - int i, max_hpte_count, valid; - unsigned long s_addr; - unsigned char *hpte_slot_array; - unsigned long hidx, shift, vpn, hash, slot; - - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; - - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } -} - - int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * hash page table entries. */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) - invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, + ssize); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 87ff0c1..c175c99 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,29 +739,13 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, i; - unsigned long s_addr; - int max_hpte_count; - unsigned int psize, valid; - unsigned char *hpte_slot_array; - unsigned long hidx, vpn, vsid, hash, shift, slot; - - /* - * Flush all the hptes mapping this hugepage - */ - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; + int ssize; + unsigned int psize; + unsigned long vsid; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM - psize = get_slice_psize(mm, s_addr); + psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif if (old_pmd & _PAGE_COMBO) @@ -769,46 +753,16 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, else psize = MMU_PAGE_64K; - if (!is_kernel_addr(s_addr)) { - ssize = user_segment_size(s_addr); - vsid = get_vsid(mm->context.id, s_addr, ssize); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); WARN_ON(vsid == 0); } else { - vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, - hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit v0.10.2 From d557b09800dab5dd6804e5b79324069abcf0be11 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 2 Nov 2014 21:15:28 +0530 Subject: powerpc/mm/thp: Use tlbiel if possible If we know that user address space has never executed on other cpus we could use tlbiel. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 15c9150..e5c0919 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -60,7 +60,7 @@ struct machdep_calls { void (*hugepage_invalidate)(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize); + int psize, int ssize, int local); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 19550d3..4d3ecd8 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -128,7 +128,8 @@ extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize); + pmd_t *pmdp, unsigned int psize, int ssize, + int local); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 558e50b..1370091 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -425,7 +425,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i; struct hash_pte *hptep; @@ -471,7 +471,7 @@ static void native_hugepage_invalidate(unsigned long vsid, * instruction compares entry_VA in tlb with the VA specified * here */ - tlbie(vpn, psize, actual_psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, local); } local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6c2076c..68211d3 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1317,7 +1317,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) + pmd_t *pmdp, unsigned int psize, int ssize, int local) { int i, max_hpte_count, valid; unsigned long s_addr; @@ -1334,9 +1334,11 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); + if (ppc_md.hugepage_invalidate) { + ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); + goto tm_abort; + } /* * No bluk hpte removal support, invalidate each entry */ @@ -1362,8 +1364,24 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); + MMU_PAGE_16M, ssize, local); + } +tm_abort: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); } +#endif } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 1b3ad46..3a648cd 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize); + ssize, local); } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c175c99..eea9fa1 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize; + int ssize, local = 0; unsigned int psize; unsigned long vsid; + const struct cpumask *tmp; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM @@ -762,7 +763,11 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, ssize = mmu_kernel_ssize; } - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize); + tmp = cpumask_of(smp_processor_id()); + if (cpumask_equal(mm_cpumask(mm), tmp)) + local = 1; + + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d214a01..832f221 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -442,7 +442,7 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i, index = 0; unsigned long s_addr = addr; -- cgit v0.10.2 From 1ad7d70562eeb14df8b6d3e1a0a56f1bdfb990f7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 28 Nov 2014 10:06:42 +0530 Subject: powerpc/xmon: Enable HW instruction breakpoint on POWER8 This patch enables support for hardware instruction breakpoint in xmon on POWER8 platform with the help of a new register called the CIABR (Completed Instruction Address Breakpoint Register). With this patch, a single hardware instruction breakpoint can be added and cleared during any active xmon debug session. The hardware based instruction breakpoint mechanism works correctly with the existing TRAP based instruction breakpoint available on xmon. There are no powerpc CPU with CPU_FTR_IABR feature any more. This patch has re-purposed all the existing IABR related code to work with CIABR register based HW instruction breakpoint. This has one odd feature, which is that when we hit a breakpoint xmon doesn't tell us we have hit the breakpoint. This is because xmon is expecting bp->address == regs->nip. Because CIABR fires on completition regs->nip points to the instruction after the breakpoint. We could fix that, but it would then confuse other parts of the xmon code which think we need to emulate the instruction. [mpe] Signed-off-by: Michael Ellerman Signed-off-by: Anshuman Khandual diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index ef18021..820dc13 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -51,6 +51,12 @@ #include #endif +#if defined(CONFIG_PPC_SPLPAR) +#include +#else +static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; +#endif + #include "nonstdio.h" #include "dis-asm.h" @@ -270,6 +276,45 @@ static inline void cinval(void *p) asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); } +/** + * write_ciabr() - write the CIABR SPR + * @ciabr: The value to write. + * + * This function writes a value to the CIARB register either directly + * through mtspr instruction if the kernel is in HV privilege mode or + * call a hypervisor function to achieve the same in case the kernel + * is in supervisor privilege mode. + */ +static void write_ciabr(unsigned long ciabr) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + mtspr(SPRN_CIABR, ciabr); + return; + } + plapr_set_ciabr(ciabr); +} + +/** + * set_ciabr() - set the CIABR + * @addr: The value to set. + * + * This function sets the correct privilege value into the the HW + * breakpoint address before writing it up in the CIABR register. + */ +static void set_ciabr(unsigned long addr) +{ + addr &= ~CIABR_PRIV; + + if (cpu_has_feature(CPU_FTR_HVMODE)) + addr |= CIABR_PRIV_HYPER; + else + addr |= CIABR_PRIV_SUPER; + write_ciabr(addr); +} + /* * Disable surveillance (the service processor watchdog function) * while we are in xmon. @@ -764,9 +809,9 @@ static void insert_cpu_bpts(void) brk.len = 8; __set_breakpoint(&brk); } - if (iabr && cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, iabr->address - | (iabr->enabled & (BP_IABR|BP_IABR_TE))); + + if (iabr) + set_ciabr(iabr->address); } static void remove_bpts(void) @@ -792,8 +837,7 @@ static void remove_bpts(void) static void remove_cpu_bpts(void) { hw_breakpoint_disable(); - if (cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, 0); + write_ciabr(0); } /* Command interpreting routine */ @@ -1128,7 +1172,7 @@ static char *breakpoint_help_string = "b [cnt] set breakpoint at given instr addr\n" "bc clear all breakpoints\n" "bc clear breakpoint number n or at addr\n" - "bi [cnt] set hardware instr breakpoint (POWER3/RS64 only)\n" + "bi [cnt] set hardware instr breakpoint (POWER8 only)\n" "bd [cnt] set hardware data breakpoint\n" ""; @@ -1167,7 +1211,7 @@ bpt_cmds(void) break; case 'i': /* bi - hardware instr breakpoint */ - if (!cpu_has_feature(CPU_FTR_IABR)) { + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { printf("Hardware instruction breakpoint " "not supported on this cpu\n"); break; -- cgit v0.10.2 From abb90ee7bca5af977b90a0c6be44f631fdacc932 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Dec 2014 16:54:13 +1100 Subject: powerpc/xmon: Cleanup the breakpoint flags Drop BP_IABR_TE, which though used, does not do anything useful. Rename BP_IABR to BP_CIABR. Renumber the flags. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 820dc13..dfe3372 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -94,10 +94,9 @@ struct bpt { }; /* Bits in bpt.enabled */ -#define BP_IABR_TE 1 /* IABR translation enabled */ -#define BP_IABR 2 -#define BP_TRAP 8 -#define BP_DABR 0x10 +#define BP_CIABR 1 +#define BP_TRAP 2 +#define BP_DABR 4 #define NBPTS 256 static struct bpt bpts[NBPTS]; @@ -772,7 +771,7 @@ static void insert_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) == 0) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0) continue; if (mread(bp->address, &bp->instr[0], 4) != 4) { printf("Couldn't read instruction at %lx, " @@ -787,7 +786,7 @@ static void insert_bpts(void) continue; } store_inst(&bp->instr[0]); - if (bp->enabled & BP_IABR) + if (bp->enabled & BP_CIABR) continue; if (mwrite(bp->address, &bpinstr, 4) != 4) { printf("Couldn't write instruction at %lx, " @@ -822,7 +821,7 @@ static void remove_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) != BP_TRAP) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP) continue; if (mread(bp->address, &instr, 4) == 4 && instr == bpinstr @@ -1217,7 +1216,7 @@ bpt_cmds(void) break; } if (iabr) { - iabr->enabled &= ~(BP_IABR | BP_IABR_TE); + iabr->enabled &= ~BP_CIABR; iabr = NULL; } if (!scanhex(&a)) @@ -1226,7 +1225,7 @@ bpt_cmds(void) break; bp = new_breakpoint(a); if (bp != NULL) { - bp->enabled |= BP_IABR | BP_IABR_TE; + bp->enabled |= BP_CIABR; iabr = bp; } break; @@ -1283,7 +1282,7 @@ bpt_cmds(void) if (!bp->enabled) continue; printf("%2x %s ", BP_NUM(bp), - (bp->enabled & BP_IABR)? "inst": "trap"); + (bp->enabled & BP_CIABR) ? "inst": "trap"); xmon_print_symbol(bp->address, " ", "\n"); } break; -- cgit v0.10.2 From aefa5688c070727b8729de1aef85cad7b9933fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Dec 2014 11:00:14 +0530 Subject: powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e5c0919..c8175a3 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -42,7 +42,7 @@ struct machdep_calls { unsigned long newpp, unsigned long vpn, int bpsize, int apsize, - int ssize, int local); + int ssize, unsigned long flags); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index aeebc94..4f13c3e 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -316,27 +316,33 @@ static inline unsigned long hpt_hash(unsigned long vpn, return hash & 0x7fffffffffUL; } +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize, int subpage_prot); + unsigned long flags, int ssize, int subpage_prot); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize); + unsigned long flags, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize); + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, - int local, int ssize, unsigned int psize); + unsigned long flags, int ssize, unsigned int psize); #else static inline int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, - unsigned long trap, int local, + unsigned long trap, unsigned long flags, int ssize, unsigned int psize) { BUG(); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 4d3ecd8..23d351c 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -125,11 +125,11 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, int local); + int ssize, unsigned long flags); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, - int local); + unsigned long flags); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ad62f4d..6213f49 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1565,9 +1565,11 @@ do_hash_page: * r3 contains the faulting address * r4 contains the required access permissions * r5 contains the trap number + * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + ld r6,_DSISR(r1) bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5094f32..463174a 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 1370091..9c4880d 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,11 +283,11 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); @@ -322,8 +322,15 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } native_unlock_hpte(hptep); } - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); + return ret; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68211d3..e56a307 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, int local) + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) { int i, max_hpte_count, valid; unsigned long s_addr; unsigned char *hpte_slot_array; unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; s_addr = addr & HPAGE_PMD_MASK; hpte_slot_array = get_hpte_slot_array(pmdp); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 3a648cd..8668651 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -19,8 +19,8 @@ #include int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize, local); + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -108,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf93..d94b1af 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index eea9fa1..4fe5f64 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, local = 0; + int ssize; unsigned int psize; unsigned long vsid; + unsigned long flags = 0; const struct cpumask *tmp; /* get the base page size,vsid and segment size */ @@ -765,9 +766,9 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, tmp = cpumask_of(smp_processor_id()); if (cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index d4d245c..bee9232 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -186,7 +186,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -369,7 +369,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; unsigned long want_v; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ffcbd24..f7af74f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -181,7 +181,8 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 0; } -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; @@ -196,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e45894a..d98f845 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -144,7 +144,7 @@ int spufs_handle_class1(struct spu_context *ctx) access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; local_irq_save(flags); - ret = hash_page(ea, access, 0x300); + ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); /* hashing failed, so try the actual fault handler */ diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3e270e3..2f95d33 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -110,7 +110,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { int result; u64 hpte_v, want_v, hpte_rs; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 832f221..469751d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -284,7 +284,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index c99e896..f8684bc 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -133,7 +133,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, { unsigned flt = 0; int result; - unsigned long access, flags; + unsigned long access, flags, inv_flags = 0; if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); @@ -149,8 +149,12 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, access |= _PAGE_RW; if ((!ctx->kernel) || ~(dar & (1ULL << 63))) access |= _PAGE_USER; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + local_irq_save(flags); - hash_page_mm(mm, dar, access, 0x300); + hash_page_mm(mm, dar, access, 0x300, inv_flags); local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); -- cgit v0.10.2 From 682e77c861c4c60f79ffbeae5e1938ffed24a575 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 5 Dec 2014 10:01:15 +0530 Subject: powerpc/book3s: Fix partial invalidation of TLBs in MCE code. The existing MCE code calls flush_tlb hook with IS=0 (single page) resulting in partial invalidation of TLBs which is not right. This patch fixes that by passing IS=0xc00 to invalidate whole TLB for successful recovery from TLB and ERAT errors. Cc: stable@vger.kernel.org Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index aa9aff3..b6f123a 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -79,7 +79,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) } if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); /* reset error bits */ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; } @@ -110,7 +110,7 @@ static long mce_handle_common_ierror(uint64_t srr1) break; case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); handled = 1; } break; -- cgit v0.10.2 From 56548fc0e86cb9156af7a7e1f15ba78f251dafaf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 3 Dec 2014 14:48:40 +1100 Subject: powerpc/powernv: Return to cpu offline loop when finished in KVM guest When a secondary hardware thread has finished running a KVM guest, we currently put that thread into nap mode using a nap instruction in the KVM code. This changes the code so that instead of doing a nap instruction directly, we instead cause the call to power7_nap() that put the thread into nap mode to return. The reason for doing this is to avoid having the KVM code having to know what low-power mode to put the thread into. In the case of a secondary thread used to run a KVM guest, the thread will be offline from the point of view of the host kernel, and the relevant power7_nap() call is the one in pnv_smp_cpu_disable(). In this case we don't want to clear pending IPIs in the offline loop in that function, since that might cause us to miss the wakeup for the next time the thread needs to run a guest. To tell whether or not to clear the interrupt, we use the SRR1 value returned from power7_nap(), and check if it indicates an external interrupt. We arrange that the return from power7_nap() when we have finished running a guest returns 0, so pending interrupts don't get flushed in that case. Note that it is important a secondary thread that has finished executing in the guest, or that didn't have a guest to run, should not return to power7_nap's caller while the kvm_hstate.hwthread_req flag in the PACA is non-zero, because the return from power7_nap will reenable the MMU, and the MMU might still be in guest context. In this situation we spin at low priority in real mode waiting for hwthread_req to become zero. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index dda7ac4..29c3798 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -451,7 +451,7 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern void power7_nap(int check_irq); +extern unsigned long power7_nap(int check_irq); extern void power7_sleep(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6213f49..db08382 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -131,6 +131,8 @@ BEGIN_FTR_SECTION 1: #endif + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 beq cr1,2f b power7_wakeup_noloss 2: b power7_wakeup_loss diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index c0754bb..18c0687 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -212,6 +212,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mtspr SPRN_SRR0,r5 rfid +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION @@ -219,15 +223,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) - ld r3,_CCR(r1) + ld r6,_CCR(r1) ld r4,_MSR(r1) ld r5,_NIP(r1) addi r1,r1,INT_FRAME_SIZE - mtcr r3 + mtcr r6 mtspr SPRN_SRR1,r4 mtspr SPRN_SRR0,r5 rfid +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index edb2ccd..65c105b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -201,8 +201,6 @@ kvmppc_primary_no_guest: bge kvm_novcpu_exit /* another thread already exiting */ li r3, NAPPING_NOVCPU stb r3, HSTATE_NAPPING(r13) - li r3, 1 - stb r3, HSTATE_HWTHREAD_REQ(r13) b kvm_do_nap @@ -293,6 +291,8 @@ kvm_start_guest: /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest +kvm_secondary_got_guest: + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR(r13) std r6, HSTATE_DSCR(r13) @@ -318,27 +318,46 @@ kvm_start_guest: stwcx. r3, 0, r4 bne 51b +/* + * At this point we have finished executing in the guest. + * We need to wait for hwthread_req to become zero, since + * we may not turn on the MMU while hwthread_req is non-zero. + * While waiting we also need to check if we get given a vcpu to run. + */ kvm_no_guest: - li r0, KVM_HWTHREAD_IN_NAP + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 53f + HMT_MEDIUM + li r0, KVM_HWTHREAD_IN_KERNEL stb r0, HSTATE_HWTHREAD_STATE(r13) -kvm_do_nap: - /* Clear the runlatch bit before napping */ - mfspr r2, SPRN_CTRLF - clrrdi r2, r2, 1 - mtspr SPRN_CTRLT, r2 - + /* need to recheck hwthread_req after a barrier, to avoid race */ + sync + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 54f +/* + * We jump to power7_wakeup_loss, which will return to the caller + * of power7_nap in the powernv cpu offline loop. The value we + * put in r3 becomes the return value for power7_nap. + */ li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + li r3, 0 + b power7_wakeup_loss + +53: HMT_LOW + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + beq kvm_no_guest + HMT_MEDIUM + b kvm_secondary_got_guest + +54: li r0, KVM_HWTHREAD_IN_KVM + stb r0, HSTATE_HWTHREAD_STATE(r13) + b kvm_no_guest /****************************************************************************** * * @@ -2172,6 +2191,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the * runlatch bit before napping. */ +kvm_do_nap: mfspr r2, SPRN_CTRLF clrrdi r2, r2, 1 mtspr SPRN_CTRLT, r2 diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 4753958..b716f66 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -149,6 +149,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; + unsigned long srr1; /* Standard hot unplug procedure */ local_irq_disable(); @@ -165,13 +166,25 @@ static void pnv_smp_cpu_kill_self(void) mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { ppc64_runlatch_off(); - power7_nap(1); + srr1 = power7_nap(1); ppc64_runlatch_on(); - /* Clear the IPI that woke us up */ - icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - mb(); + /* + * If the SRR1 value indicates that we woke up due to + * an external interrupt, then clear the interrupt. + * We clear the interrupt before checking for the + * reason, so as to avoid a race where we wake up for + * some other reason, find nothing and clear the interrupt + * just as some other cpu is sending us an interrupt. + * If we returned from power7_nap as a result of + * having finished executing in a KVM guest, then srr1 + * contains 0. + */ + if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; + smp_mb(); + } if (cpu_core_split_required()) continue; -- cgit v0.10.2